解説記事:Tesseract

最終更新日:

Tesseract についての簡単な紹介。


環境設定

ファイルダウンロード

# Ubuntu 16.04 LTS : Tesseract 3.04.01, Leptoica 1.73, libgif 5.1.2, libjpeg 8d, libpng 1.2.54, libtiff 4.0.6, zlib 1.2.8, libwebp 0.4.4, libopenjp2 2.1.2
# Raspbian stretch : Tesseract 3.04.01, Leptoica 1.74.1, libgif 5.1.4, libjpeg 6b, libpng 1.6.28, libtiff 4.0.8, zlib 1.2.8, libwebp 0.5.2, libopenjp2 2.1.2

#--------------------------------------------------------
# Ubuntu 16.04 LTS
#--------------------------------------------------------
#libtool
wget https://ftp.gnu.org/gnu/libtool/libtool-2.4.6.tar.gz
tar zxvf libtool-2.4.6.tar.gz

#zlib
wget https://github.com/madler/zlib/archive/v1.2.8.tar.gz -O zlib-1.2.8.tar.gz
tar zxvf zlib-1.2.8.tar.gz

#libpng
wget https://sourceforge.net/projects/libpng/files/libpng12/older-releases/1.2.54/libpng-1.2.54.tar.gz/download -O libpng-1.2.54.tar.gz
tar zxvf libpng-1.2.54.tar.gz

#libjpeg
wget http://www.ijg.org/files/jpegsrc.v6b.tar.gz
tar zxvf jpegsrc.v6b.tar.gz

#libgif
wget https://sourceforge.net/projects/giflib/files/giflib-5.1.2.tar.gz/download -O giflib-5.1.2.tar.gz
tar zxvf giflib-5.1.2.tar.gz

#libtiff
wget http://download.osgeo.org/libtiff/tiff-4.0.6.tar.gz
tar zxvf tiff-4.0.6.tar.gz

#Leptonica
wget https://github.com/DanBloomberg/leptonica/archive/v1.73.tar.gz -O leptonica-1.73.tar.gz
tar zxvf leptonica-1.73.tar.gz

#Tesseract
wget https://github.com/tesseract-ocr/tesseract/archive/3.04.01.tar.gz -O tesseract-3.04.01.tar.gz
tar zxvf tesseract-3.04.01.tar.gz

#--------------------------------------------------------
# Tesseract Training Data for 3.04.xx
#--------------------------------------------------------
wget https://github.com/tesseract-ocr/tessdata/raw/3.04.00/eng.traineddata
wget https://github.com/tesseract-ocr/tessdata/raw/3.04.00/jpn.traineddata
wget https://github.com/tesseract-ocr/tessdata/raw/3.04.00/osd.traineddata
		

ビルド

# Ubuntu 16.04 LTS : Tesseract 3.04.01, Leptoica 1.73, libgif 5.1.2, libjpeg 8d, libpng 1.2.54, libtiff 4.0.6, zlib 1.2.8, libwebp 0.4.4, libopenjp2 2.1.2
# Raspbian stretch : Tesseract 3.04.01, Leptoica 1.74.1, libgif 5.1.4, libjpeg 6b, libpng 1.6.28, libtiff 4.0.8, zlib 1.2.8, libwebp 0.5.2, libopenjp2 2.1.2

#--------------------------------------------------------
# Ubuntu 16.04 LTS
#--------------------------------------------------------
export SRC_TOP_FOLDER=/home/username/OCR
export PATH=$SRC_TOP_FOLDER/BuildFolder/libtool-2.4.6/bin:$PATH

mkdir $SRC_TOP_FOLDER/BuildFolder
cd $SRC_TOP_FOLDER

#libtool
mkdir $SRC_TOP_FOLDER/BuildFolder/libtool-2.4.6
cd $SRC_TOP_FOLDER/libtool-2.4.6
./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/libtool-2.4.6
make
make install

#zlib
mkdir $SRC_TOP_FOLDER/BuildFolder/zlib-1.2.8
cd $SRC_TOP_FOLDER/zlib-1.2.8
CFLAGS="-fPIC" ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/zlib-1.2.8 --static
make
make install

#libpng
mkdir $SRC_TOP_FOLDER/BuildFolder/libpng-1.2.54
cd libpng-1.2.54/
./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/libpng-1.2.54 --enable-static
make
make install

#libjpeg
mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b
mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b/bin
mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b/lib
mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b/include
mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b/man
mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b/man/man1
cd jpeg-6b
CFLAGS="-fPIC" ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/jpeg-6b
make
make install
make install-lib
make install-headers

#libgif
mkdir $SRC_TOP_FOLDER/BuildFolder/giflib-5.1.2
cd giflib-5.1.2
./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/giflib-5.1.2 --enable-static
make
make install

#libtiff
mkdir $SRC_TOP_FOLDER/BuildFolder/tiff-4.0.6
cd tiff-4.0.6
./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/tiff-4.0.6 --enable-static
make
make install

#Leptonica
mkdir $SRC_TOP_FOLDER/BuildFolder/leptonica-1.73
cd leptonica-1.73
chmod a+x ./configure
#./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/leptonica-1.73 --enable-static
LDFLAGS="-L$SRC_TOP_FOLDER/BuildFolder/tiff-4.0.6/lib -L$SRC_TOP_FOLDER/BuildFolder/giflib-5.1.2/lib -L$SRC_TOP_FOLDER/BuildFolder/jpeg-6b/lib -L$SRC_TOP_FOLDER/BuildFolder/libpng-1.2.54/lib -L$SRC_TOP_FOLDER/BuildFolder/zlib-1.2.8/lib -L$SRC_TOP_FOLDER/BuildFolder/libtool-2.4.6/lib" CFLAGS="-I$SRC_TOP_FOLDER/BuildFolder/tiff-4.0.6/include -I$SRC_TOP_FOLDER/BuildFolder/giflib-5.1.2/include -I$SRC_TOP_FOLDER/BuildFolder/jpeg-6b/include -I$SRC_TOP_FOLDER/BuildFolder/libpng-1.2.54/include -I$SRC_TOP_FOLDER/BuildFolder/zlib-1.2.8/include -I$SRC_TOP_FOLDER/BuildFolder/libtool-2.4.6/include" ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/leptonica-1.73 --enable-static 
make
make install

#Tesseract
mkdir $SRC_TOP_FOLDER/BuildFolder/tesseract-3.04.01
cd tesseract-3.04.01
LIBLEPT_HEADERSDIR=$SRC_TOP_FOLDER/BuildFolder/leptonica-1.73/include ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/tesseract-3.04.01  --enable-static --with-extra-libraries=$SRC_TOP_FOLDER/BuildFolder/leptonica-1.73/lib
make
make install

#Tesseract traindata
cp $SRC_TOP_FOLDER/eng.traineddata  $SRC_TOP_FOLDER/BuildFolder/tesseract-3.04.01/share/tessdata
cp $SRC_TOP_FOLDER/jpn.traineddata  $SRC_TOP_FOLDER/BuildFolder/tesseract-3.04.01/share/tessdata
cp $SRC_TOP_FOLDER/osd.traineddata  $SRC_TOP_FOLDER/BuildFolder/tesseract-3.04.01/share/tessdata	

実行

export OCR_TOP_DIR=/home/username/OCR/BuildFolder
export TESSDATA_PREFIX=$OCR_TOP_DIR/tesseract-3.04.01/share
export PATH=$PATH:$OCR_TOP_DIR/tesseract-3.04.01/bin
export LD_LIBRARY_PATH=$OCR_TOP_DIR/leptonica-1.73/lib:$OCR_TOP_DIR/tesseract-3.04.01/lib:$LD_LIBRARY_PATH
$OCR_TOP_DIR/tesseract-3.04.01/bin/tesseract -v
time $OCR_TOP_DIR/tesseract-3.04.01/bin/tesseract test.jpg test -l eng pdf
		

参考資料

  • https://nesta-jp.appspot.com/tesssarect-ocr-install.html
  • http://www.neko.ne.jp/~freewing/raspberry_pi/raspberry_pi_3_ocr_tesseract/