From d9b8d4fdedfd0de9bf2cda017b5260cfc1e350d4 Mon Sep 17 00:00:00 2001 From: Liang Ding Date: Thu, 2 Feb 2023 23:53:34 +0800 Subject: [PATCH] =?UTF-8?q?:art:=20Tesseract=20OCR=20=E8=AF=AD=E8=A8=80?= =?UTF-8?q?=E5=8C=85=E8=BF=87=E6=BB=A4=20Fix=20https://github.com/siyuan-n?= =?UTF-8?q?ote/siyuan/issues/7242?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/util/tesseract.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/kernel/util/tesseract.go b/kernel/util/tesseract.go index 94decdff7..32057f909 100644 --- a/kernel/util/tesseract.go +++ b/kernel/util/tesseract.go @@ -119,12 +119,21 @@ func initTesseract() { return } - for _, lang := range langs { - TesseractLangs = append(TesseractLangs, lang) - } + TesseractLangs = filterTesseractLangs(langs) logging.LogInfof("tesseract-ocr enabled [ver=%s, langs=%s]", ver, strings.Join(TesseractLangs, "+")) } +func filterTesseractLangs(langs []string) (ret []string) { + ret = []string{} + for _, lang := range langs { + if "eng" == lang || strings.HasPrefix(lang, "chi") || "fra" == lang || "spa" == lang || "deu" == lang || + "rus" == lang || "osd" == lang { + ret = append(ret, lang) + } + } + return ret +} + func getTesseractVer() (ret string) { if ContainerStd != Container { return