From 958c570eb996087ebc1413cd7725da684358cb9a Mon Sep 17 00:00:00 2001 From: Liang Ding Date: Wed, 18 Jan 2023 00:23:17 +0800 Subject: [PATCH 1/3] =?UTF-8?q?:bug:=20macOS=20=E7=AB=AF=20Tesseract=20OCR?= =?UTF-8?q?=20=E5=AE=89=E8=A3=85=E5=90=8E=E4=B8=8D=E8=AF=86=E5=88=AB=20htt?= =?UTF-8?q?ps://github.com/siyuan-note/siyuan/issues/7107?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/util/tesseract.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/util/tesseract.go b/kernel/util/tesseract.go index 6f4d9ab00..333787874 100644 --- a/kernel/util/tesseract.go +++ b/kernel/util/tesseract.go @@ -156,8 +156,8 @@ func getTesseractVer() (ret string) { if nil != err { return } - logging.LogInfof("tesseract version output [%s]", string(data)) - if nil == err && strings.HasPrefix(string(data), "tesseract ") { + + if strings.HasPrefix(string(data), "tesseract ") { parts := bytes.Split(data, []byte("\n")) if 0 < len(parts) { ret = strings.TrimPrefix(string(parts[0]), "tesseract ") From ca2a27c9648166cb97ca543129561dacda39c030 Mon Sep 17 00:00:00 2001 From: Liang Ding Date: Wed, 18 Jan 2023 00:27:11 +0800 Subject: [PATCH 2/3] =?UTF-8?q?:bug:=20macOS=20=E7=AB=AF=20Tesseract=20OCR?= =?UTF-8?q?=20=E5=AE=89=E8=A3=85=E5=90=8E=E4=B8=8D=E8=AF=86=E5=88=AB=20htt?= =?UTF-8?q?ps://github.com/siyuan-note/siyuan/issues/7107?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/util/tesseract.go | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/util/tesseract.go b/kernel/util/tesseract.go index 333787874..2a54bba4f 100644 --- a/kernel/util/tesseract.go +++ b/kernel/util/tesseract.go @@ -147,6 +147,7 @@ func getTesseractVer() (ret string) { data, err := cmd.CombinedOutput() if nil != err { if strings.Contains(err.Error(), "executable file not found") { + // macOS 端 Tesseract OCR 安装后不识别 https://github.com/siyuan-note/siyuan/issues/7107 TesseractBin = "/usr/local/bin/tesseract" cmd = exec.Command(TesseractBin, "--version") gulu.CmdAttr(cmd) From 6d4aa07bc76c57dd4d324645277f5cc63a9ec7d6 Mon Sep 17 00:00:00 2001 From: Liang Ding Date: Wed, 18 Jan 2023 00:39:42 +0800 Subject: [PATCH 3/3] =?UTF-8?q?:art:=20OCR=20=E7=BB=93=E6=9E=9C=E5=89=94?= =?UTF-8?q?=E9=99=A4=E4=B8=8D=E5=8F=AF=E8=A7=81=E5=AD=97=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/util/tesseract.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/util/tesseract.go b/kernel/util/tesseract.go index 2a54bba4f..bc73ee200 100644 --- a/kernel/util/tesseract.go +++ b/kernel/util/tesseract.go @@ -89,9 +89,7 @@ func Tesseract(imgAbsPath string) string { } ret := string(output) - ret = strings.ReplaceAll(ret, "\r", "") - ret = strings.ReplaceAll(ret, "\n", "") - ret = strings.ReplaceAll(ret, "\t", " ") + ret = gulu.Str.RemoveInvisible(ret) reg := regexp.MustCompile("\\s{2,}") ret = reg.ReplaceAllString(ret, " ") msg := fmt.Sprintf("OCR [%s] [%s]", info.Name(), ret)