🎨 桌面端支持搜索图片 OCR 文本 https://github.com/siyuan-note/siyuan/issues/3470

This commit is contained in:
Liang Ding 2023-01-16 15:02:00 +08:00
parent ff91a58747
commit daa30de3c9
No known key found for this signature in database
GPG key ID: 136F30F901A2231D

View file

@ -31,6 +31,7 @@ import (
"github.com/88250/gulu"
"github.com/dustin/go-humanize"
"github.com/panjf2000/ants/v2"
"github.com/siyuan-note/logging"
)
@ -68,15 +69,11 @@ func Tesseract(imgAbsPath string) string {
output, err := cmd.CombinedOutput()
if ctx.Err() == context.DeadlineExceeded {
logging.LogWarnf("tesseract [path=%s, size=%d] timeout", imgAbsPath, info.Size())
assetsTexts[imgAbsPath] = ""
assetsTextsChanged = true
return ""
}
if nil != err {
logging.LogWarnf("tesseract [path=%s, size=%d] failed: %s", imgAbsPath, info.Size(), err)
assetsTexts[imgAbsPath] = ""
assetsTextsChanged = true
return ""
}
@ -87,8 +84,6 @@ func Tesseract(imgAbsPath string) string {
reg := regexp.MustCompile("\\s{2,}")
ret = reg.ReplaceAllString(ret, " ")
logging.LogInfof("tesseract [path=%s, size=%d, text=%s, elapsed=%dms]", imgAbsPath, info.Size(), ret, time.Since(now).Milliseconds())
assetsTexts[imgAbsPath] = ret
assetsTextsChanged = true
return ret
}
@ -98,10 +93,30 @@ func AutoOCRAssets() {
}
for {
assetsPath := GetDataAssetsAbsPath()
assets := getUnOCRAssetsAbsPaths()
for _, p := range assets {
Tesseract(p)
waitGroup := &sync.WaitGroup{}
lock := &sync.Mutex{}
p, _ := ants.NewPoolWithFunc(4, func(arg interface{}) {
defer waitGroup.Done()
assetAbsPath := arg.(string)
text := Tesseract(assetAbsPath)
p := strings.TrimPrefix(assetAbsPath, assetsPath)
p = "assets" + filepath.ToSlash(p)
lock.Lock()
assetsTexts[p] = text
lock.Unlock()
assetsTextsChanged = true
})
for _, assetAbsPath := range assets {
waitGroup.Add(1)
p.Invoke(assetAbsPath)
}
waitGroup.Wait()
p.Release()
time.Sleep(7 * time.Second)
}
}