From 6d4aa07bc76c57dd4d324645277f5cc63a9ec7d6 Mon Sep 17 00:00:00 2001 From: Liang Ding Date: Wed, 18 Jan 2023 00:39:42 +0800 Subject: [PATCH] =?UTF-8?q?:art:=20OCR=20=E7=BB=93=E6=9E=9C=E5=89=94?= =?UTF-8?q?=E9=99=A4=E4=B8=8D=E5=8F=AF=E8=A7=81=E5=AD=97=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/util/tesseract.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/util/tesseract.go b/kernel/util/tesseract.go index 2a54bba4f..bc73ee200 100644 --- a/kernel/util/tesseract.go +++ b/kernel/util/tesseract.go @@ -89,9 +89,7 @@ func Tesseract(imgAbsPath string) string { } ret := string(output) - ret = strings.ReplaceAll(ret, "\r", "") - ret = strings.ReplaceAll(ret, "\n", "") - ret = strings.ReplaceAll(ret, "\t", " ") + ret = gulu.Str.RemoveInvisible(ret) reg := regexp.MustCompile("\\s{2,}") ret = reg.ReplaceAllString(ret, " ") msg := fmt.Sprintf("OCR [%s] [%s]", info.Name(), ret)