From daa30de3c98d4a17a44783b03dd740e5906bf302 Mon Sep 17 00:00:00 2001 From: Liang Ding Date: Mon, 16 Jan 2023 15:02:00 +0800 Subject: [PATCH] =?UTF-8?q?:art:=20=E6=A1=8C=E9=9D=A2=E7=AB=AF=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E6=90=9C=E7=B4=A2=E5=9B=BE=E7=89=87=20OCR=20=E6=96=87?= =?UTF-8?q?=E6=9C=AC=20https://github.com/siyuan-note/siyuan/issues/3470?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/util/ocr.go | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/kernel/util/ocr.go b/kernel/util/ocr.go index c0feba419..34791045b 100644 --- a/kernel/util/ocr.go +++ b/kernel/util/ocr.go @@ -31,6 +31,7 @@ import ( "github.com/88250/gulu" "github.com/dustin/go-humanize" + "github.com/panjf2000/ants/v2" "github.com/siyuan-note/logging" ) @@ -68,15 +69,11 @@ func Tesseract(imgAbsPath string) string { output, err := cmd.CombinedOutput() if ctx.Err() == context.DeadlineExceeded { logging.LogWarnf("tesseract [path=%s, size=%d] timeout", imgAbsPath, info.Size()) - assetsTexts[imgAbsPath] = "" - assetsTextsChanged = true return "" } if nil != err { logging.LogWarnf("tesseract [path=%s, size=%d] failed: %s", imgAbsPath, info.Size(), err) - assetsTexts[imgAbsPath] = "" - assetsTextsChanged = true return "" } @@ -87,8 +84,6 @@ func Tesseract(imgAbsPath string) string { reg := regexp.MustCompile("\\s{2,}") ret = reg.ReplaceAllString(ret, " ") logging.LogInfof("tesseract [path=%s, size=%d, text=%s, elapsed=%dms]", imgAbsPath, info.Size(), ret, time.Since(now).Milliseconds()) - assetsTexts[imgAbsPath] = ret - assetsTextsChanged = true return ret } @@ -98,10 +93,30 @@ func AutoOCRAssets() { } for { + assetsPath := GetDataAssetsAbsPath() assets := getUnOCRAssetsAbsPaths() - for _, p := range assets { - Tesseract(p) + + waitGroup := &sync.WaitGroup{} + lock := &sync.Mutex{} + p, _ := ants.NewPoolWithFunc(4, func(arg interface{}) { + defer waitGroup.Done() + + assetAbsPath := arg.(string) + text := Tesseract(assetAbsPath) + p := strings.TrimPrefix(assetAbsPath, assetsPath) + p = "assets" + filepath.ToSlash(p) + lock.Lock() + assetsTexts[p] = text + lock.Unlock() + assetsTextsChanged = true + }) + for _, assetAbsPath := range assets { + waitGroup.Add(1) + p.Invoke(assetAbsPath) } + waitGroup.Wait() + p.Release() + time.Sleep(7 * time.Second) } }