From 9788cce4b0e63cefbdb4ba8aa431db9fdc7dc763 Mon Sep 17 00:00:00 2001 From: Liang Ding Date: Mon, 16 Jan 2023 16:16:34 +0800 Subject: [PATCH] =?UTF-8?q?:art:=20=E6=A1=8C=E9=9D=A2=E7=AB=AF=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E6=90=9C=E7=B4=A2=E5=9B=BE=E7=89=87=20OCR=20=E6=96=87?= =?UTF-8?q?=E6=9C=AC=20https://github.com/siyuan-note/siyuan/issues/3470?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/model/assets.go | 9 ++++++++- kernel/util/ocr.go | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/model/assets.go b/kernel/model/assets.go index 61e363621..d3f820477 100644 --- a/kernel/model/assets.go +++ b/kernel/model/assets.go @@ -636,11 +636,18 @@ func UnusedAssets() (ret []string) { } } - // 排除文件注解和对应文件 var toRemoves []string for asset, _ := range assetsPathMap { if strings.HasSuffix(asset, ".sya") { + // 排除文件注解和对应文件 toRemoves = append(toRemoves, asset, strings.TrimSuffix(asset, ".sya")) + continue + } + + if strings.HasSuffix(asset, "ocr-texts.json") { + // 排除 OCR 结果文本 + toRemoves = append(toRemoves, asset) + continue } } for _, toRemove := range toRemoves { diff --git a/kernel/util/ocr.go b/kernel/util/ocr.go index b5d9f353f..0f3d8318b 100644 --- a/kernel/util/ocr.go +++ b/kernel/util/ocr.go @@ -259,6 +259,10 @@ func getTesseractVer() (ret string) { cmd := exec.Command("tesseract", "--version") gulu.CmdAttr(cmd) data, err := cmd.CombinedOutput() + if nil != err { + logging.LogWarnf("tesseract-ocr not found: %s", err) + } + logging.LogWarnf("tesseract --version: %s", string(data)) if nil == err && strings.HasPrefix(string(data), "tesseract v") { parts := bytes.Split(data, []byte("\n")) if 0 < len(parts) {