最終更新日:
Tesseract についての簡単な紹介。
環境設定
ファイルダウンロード
# Ubuntu 16.04 LTS : Tesseract 3.04.01, Leptoica 1.73, libgif 5.1.2, libjpeg 8d, libpng 1.2.54, libtiff 4.0.6, zlib 1.2.8, libwebp 0.4.4, libopenjp2 2.1.2 # Raspbian stretch : Tesseract 3.04.01, Leptoica 1.74.1, libgif 5.1.4, libjpeg 6b, libpng 1.6.28, libtiff 4.0.8, zlib 1.2.8, libwebp 0.5.2, libopenjp2 2.1.2 #-------------------------------------------------------- # Ubuntu 16.04 LTS #-------------------------------------------------------- #libtool wget https://ftp.gnu.org/gnu/libtool/libtool-2.4.6.tar.gz tar zxvf libtool-2.4.6.tar.gz #zlib wget https://github.com/madler/zlib/archive/v1.2.8.tar.gz -O zlib-1.2.8.tar.gz tar zxvf zlib-1.2.8.tar.gz #libpng wget https://sourceforge.net/projects/libpng/files/libpng12/older-releases/1.2.54/libpng-1.2.54.tar.gz/download -O libpng-1.2.54.tar.gz tar zxvf libpng-1.2.54.tar.gz #libjpeg wget http://www.ijg.org/files/jpegsrc.v6b.tar.gz tar zxvf jpegsrc.v6b.tar.gz #libgif wget https://sourceforge.net/projects/giflib/files/giflib-5.1.2.tar.gz/download -O giflib-5.1.2.tar.gz tar zxvf giflib-5.1.2.tar.gz #libtiff wget http://download.osgeo.org/libtiff/tiff-4.0.6.tar.gz tar zxvf tiff-4.0.6.tar.gz #Leptonica wget https://github.com/DanBloomberg/leptonica/archive/v1.73.tar.gz -O leptonica-1.73.tar.gz tar zxvf leptonica-1.73.tar.gz #Tesseract wget https://github.com/tesseract-ocr/tesseract/archive/3.04.01.tar.gz -O tesseract-3.04.01.tar.gz tar zxvf tesseract-3.04.01.tar.gz #-------------------------------------------------------- # Tesseract Training Data for 3.04.xx #-------------------------------------------------------- wget https://github.com/tesseract-ocr/tessdata/raw/3.04.00/eng.traineddata wget https://github.com/tesseract-ocr/tessdata/raw/3.04.00/jpn.traineddata wget https://github.com/tesseract-ocr/tessdata/raw/3.04.00/osd.traineddata
ビルド
# Ubuntu 16.04 LTS : Tesseract 3.04.01, Leptoica 1.73, libgif 5.1.2, libjpeg 8d, libpng 1.2.54, libtiff 4.0.6, zlib 1.2.8, libwebp 0.4.4, libopenjp2 2.1.2 # Raspbian stretch : Tesseract 3.04.01, Leptoica 1.74.1, libgif 5.1.4, libjpeg 6b, libpng 1.6.28, libtiff 4.0.8, zlib 1.2.8, libwebp 0.5.2, libopenjp2 2.1.2 #-------------------------------------------------------- # Ubuntu 16.04 LTS #-------------------------------------------------------- export SRC_TOP_FOLDER=/home/username/OCR export PATH=$SRC_TOP_FOLDER/BuildFolder/libtool-2.4.6/bin:$PATH mkdir $SRC_TOP_FOLDER/BuildFolder cd $SRC_TOP_FOLDER #libtool mkdir $SRC_TOP_FOLDER/BuildFolder/libtool-2.4.6 cd $SRC_TOP_FOLDER/libtool-2.4.6 ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/libtool-2.4.6 make make install #zlib mkdir $SRC_TOP_FOLDER/BuildFolder/zlib-1.2.8 cd $SRC_TOP_FOLDER/zlib-1.2.8 CFLAGS="-fPIC" ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/zlib-1.2.8 --static make make install #libpng mkdir $SRC_TOP_FOLDER/BuildFolder/libpng-1.2.54 cd libpng-1.2.54/ ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/libpng-1.2.54 --enable-static make make install #libjpeg mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b/bin mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b/lib mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b/include mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b/man mkdir $SRC_TOP_FOLDER/BuildFolder/jpeg-6b/man/man1 cd jpeg-6b CFLAGS="-fPIC" ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/jpeg-6b make make install make install-lib make install-headers #libgif mkdir $SRC_TOP_FOLDER/BuildFolder/giflib-5.1.2 cd giflib-5.1.2 ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/giflib-5.1.2 --enable-static make make install #libtiff mkdir $SRC_TOP_FOLDER/BuildFolder/tiff-4.0.6 cd tiff-4.0.6 ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/tiff-4.0.6 --enable-static make make install #Leptonica mkdir $SRC_TOP_FOLDER/BuildFolder/leptonica-1.73 cd leptonica-1.73 chmod a+x ./configure #./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/leptonica-1.73 --enable-static LDFLAGS="-L$SRC_TOP_FOLDER/BuildFolder/tiff-4.0.6/lib -L$SRC_TOP_FOLDER/BuildFolder/giflib-5.1.2/lib -L$SRC_TOP_FOLDER/BuildFolder/jpeg-6b/lib -L$SRC_TOP_FOLDER/BuildFolder/libpng-1.2.54/lib -L$SRC_TOP_FOLDER/BuildFolder/zlib-1.2.8/lib -L$SRC_TOP_FOLDER/BuildFolder/libtool-2.4.6/lib" CFLAGS="-I$SRC_TOP_FOLDER/BuildFolder/tiff-4.0.6/include -I$SRC_TOP_FOLDER/BuildFolder/giflib-5.1.2/include -I$SRC_TOP_FOLDER/BuildFolder/jpeg-6b/include -I$SRC_TOP_FOLDER/BuildFolder/libpng-1.2.54/include -I$SRC_TOP_FOLDER/BuildFolder/zlib-1.2.8/include -I$SRC_TOP_FOLDER/BuildFolder/libtool-2.4.6/include" ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/leptonica-1.73 --enable-static make make install #Tesseract mkdir $SRC_TOP_FOLDER/BuildFolder/tesseract-3.04.01 cd tesseract-3.04.01 LIBLEPT_HEADERSDIR=$SRC_TOP_FOLDER/BuildFolder/leptonica-1.73/include ./configure --prefix=$SRC_TOP_FOLDER/BuildFolder/tesseract-3.04.01 --enable-static --with-extra-libraries=$SRC_TOP_FOLDER/BuildFolder/leptonica-1.73/lib make make install #Tesseract traindata cp $SRC_TOP_FOLDER/eng.traineddata $SRC_TOP_FOLDER/BuildFolder/tesseract-3.04.01/share/tessdata cp $SRC_TOP_FOLDER/jpn.traineddata $SRC_TOP_FOLDER/BuildFolder/tesseract-3.04.01/share/tessdata cp $SRC_TOP_FOLDER/osd.traineddata $SRC_TOP_FOLDER/BuildFolder/tesseract-3.04.01/share/tessdata
実行
export OCR_TOP_DIR=/home/username/OCR/BuildFolder export TESSDATA_PREFIX=$OCR_TOP_DIR/tesseract-3.04.01/share export PATH=$PATH:$OCR_TOP_DIR/tesseract-3.04.01/bin export LD_LIBRARY_PATH=$OCR_TOP_DIR/leptonica-1.73/lib:$OCR_TOP_DIR/tesseract-3.04.01/lib:$LD_LIBRARY_PATH $OCR_TOP_DIR/tesseract-3.04.01/bin/tesseract -v time $OCR_TOP_DIR/tesseract-3.04.01/bin/tesseract test.jpg test -l eng pdf
参考資料
- https://nesta-jp.appspot.com/tesssarect-ocr-install.html
- http://www.neko.ne.jp/~freewing/raspberry_pi/raspberry_pi_3_ocr_tesseract/