tesseract: add package

Tesseract is an open source text recognizer (OCR) Engine, available under the Apache 2.0 license. It can be used directly, or (for programmers) using an API to extract printed text from images. It supports a wide variety of languages.

Signed-off-by: Valentín Kivachuk <vk18496@gmail.com>
This commit is contained in:
Valentín Kivachuk 2019-07-14 17:45:45 +02:00
parent cfb5dbb4aa
commit 9c8e7c6f52
2 changed files with 117 additions and 0 deletions

60
utils/tessdata/Makefile Normal file
View File

@ -0,0 +1,60 @@
# Copyright (C) 2019 Valentín Kivachuk <vk18496@gmail.com>
#
# This is free software, licensed under the GNU General Public License v2.
# See /LICENSE for more information.
#
include $(TOPDIR)/rules.mk
PKG_NAME:=tessdata
PKG_VERSION:=4.0.0
PKG_RELEASE:=1
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz
PKG_SOURCE_URL:=https://codeload.github.com/tesseract-ocr/tessdata/tar.gz/$(PKG_VERSION)?
PKG_HASH:=38c637d3a1763f6c3d32e8f1d979f045668676ec5feb8ee1869ee77cedd31b08
PKG_MAINTAINER:=Valentín Kivachuk <vk18496@gmail.com>
PKG_LICENSE:=Apache-2.0
PKG_LICENSE_FILES:=COPYING
PKG_INSTALL:=1
#No need to extract 1,5GB...
PKG_UNPACK:=
include $(INCLUDE_DIR)/package.mk
ALLTESSERACTLANG:=afr amh ara asm aze aze_cyrl bel ben bod bos bre bul cat ceb ces chi_sim chi_sim_vert chi_tra chi_tra_vert chr cos cym dan dan_frak deu deu_frak div dzo ell eng enm epo equ est eus fao fas fil fin fra frk frm fry gla gle glg grc guj hat heb hin hrv hun hye iku ind isl ita ita_old jav jpn jpn_vert kan kat kat_old kaz khm kir kor kor_vert kur kur_ara lao lat lav lit ltz mal mar mkd mlt mon mri msa mya nep nld nor oci ori osd pan pol por pus que ron rus san sin slk slk_frak slv snd spa spa_old sqi srp srp_latn sun swa swe syr tam tat tel tgk tgl tha tir ton tur uig ukr urd uzb uzb_cyrl vie yid yor
define Build/Compile
endef
define Build/Install
endef
define Package/tesseract-data-default
SUBMENU:=Tesseract
SECTION:=utils
CATEGORY:=Utilities
DEPENDS:=tesseract
endef
define generate-tesseract-data-package
define Package/tesseract-data-$(1)
TITLE:=Tesseract training data for $(1) language
$(call Package/tesseract-data-default)
endef
define Package/tesseract-data-$(1)/install
$(INSTALL_DIR) $$(1)/usr/share/tessdata
$(TAR) --strip=1 -C $$(1)/usr/share/tessdata/ -xvf $(DL_DIR)/$(PKG_NAME)-$(PKG_VERSION).tar.gz $(PKG_NAME)-$(PKG_VERSION)/$(1).traineddata
endef
endef
$(foreach LANG,$(ALLTESSERACTLANG),$(eval $(call generate-tesseract-data-package,$(LANG))))
$(foreach LANG,$(ALLTESSERACTLANG),$(eval $(call BuildPackage,tesseract-data-$(LANG))))

57
utils/tesseract/Makefile Normal file
View File

@ -0,0 +1,57 @@
# Copyright (C) 2019 Valentin Kivachuk <vk18496@gmail.com>
#
# This is free software, licensed under the GNU General Public License v2.
# See /LICENSE for more information.
#
include $(TOPDIR)/rules.mk
PKG_NAME:=tesseract
PKG_VERSION:=4.0.0
PKG_RELEASE:=1
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz
PKG_SOURCE_URL:=https://codeload.github.com/tesseract-ocr/tesseract/tar.gz/$(PKG_VERSION)?
PKG_HASH:=a1f5422ca49a32e5f35c54dee5112b11b99928fc9f4ee6695cdc6768d69f61dd
PKG_MAINTAINER:=Valentin Kivachuk <vk18496@gmail.com>
PKG_LICENSE:=Apache-2.0
PKG_LICENSE_FILES:=LICENSE
PKG_BUILD_PARALLEL:=1
PKG_FIXUP:=autoreconf
PKG_INSTALL:=1
include $(INCLUDE_DIR)/package.mk
TARGET_LDFLAGS += -Wl,-rpath-link=$(STAGING_DIR)/usr/lib
define Package/tesseract
MENU:=1
SECTION:=utils
CATEGORY:=Utilities
TITLE:=Tesseract Open Source OCR Engine
URL:=https://github.com/tesseract-ocr/tesseract
DEPENDS:=+libleptonica +libpthread +libstdcpp
endef
TARGET_CFLAGS:=$(filter-out -O%,$(TARGET_CFLAGS)) -O3
define Build/InstallDev
$(INSTALL_DIR) $(1)/usr/include
$(CP) $(PKG_INSTALL_DIR)/usr/include/tesseract $(1)/usr/include/
$(INSTALL_DIR) $(1)/usr/lib
$(CP) $(PKG_INSTALL_DIR)/usr/lib/libtesseract.{a,so*} $(1)/usr/lib/
$(INSTALL_DIR) $(1)/usr/lib/pkgconfig
$(CP) $(PKG_INSTALL_DIR)/usr/lib/pkgconfig/tesseract.pc $(1)/usr/lib/pkgconfig/
endef
define Package/tesseract/install
$(INSTALL_DIR) $(1)/usr/bin
$(CP) $(PKG_INSTALL_DIR)/usr/bin/* $(1)/usr/bin/
$(INSTALL_DIR) $(1)/usr/lib
$(CP) $(PKG_INSTALL_DIR)/usr/lib/lib*.so.* $(1)/usr/lib/
$(INSTALL_DIR) $(1)/usr/share
$(CP) $(PKG_INSTALL_DIR)/usr/share/* $(1)/usr/share/
endef
$(eval $(call BuildPackage,tesseract))