🎨 Tesseract OCR 加锁串行执行提升稳定性 Fix https://github.com/siyuan-note/siyuan/issues/7265

This commit is contained in:
Liang Ding 2023-02-06 17:39:49 +08:00
parent a3444452aa
commit 9404330f23
No known key found for this signature in database
GPG key ID: 136F30F901A2231D
2 changed files with 8 additions and 22 deletions

View file

@ -1,21 +1,18 @@
package model package model
import ( import (
"github.com/siyuan-note/siyuan/kernel/task"
"io" "io"
"os" "os"
"path/filepath" "path/filepath"
"runtime"
"runtime/debug" "runtime/debug"
"strings" "strings"
"sync"
"time" "time"
"github.com/88250/gulu" "github.com/88250/gulu"
"github.com/dustin/go-humanize" "github.com/dustin/go-humanize"
"github.com/panjf2000/ants/v2"
"github.com/siyuan-note/logging" "github.com/siyuan-note/logging"
"github.com/siyuan-note/siyuan/kernel/cache" "github.com/siyuan-note/siyuan/kernel/cache"
"github.com/siyuan-note/siyuan/kernel/task"
"github.com/siyuan-note/siyuan/kernel/util" "github.com/siyuan-note/siyuan/kernel/util"
) )
@ -33,15 +30,7 @@ func autoOCRAssets() {
assetsPath := util.GetDataAssetsAbsPath() assetsPath := util.GetDataAssetsAbsPath()
assets := getUnOCRAssetsAbsPaths() assets := getUnOCRAssetsAbsPaths()
if 0 < len(assets) { if 0 < len(assets) {
poolSize := runtime.NumCPU() for i, assetAbsPath := range assets {
if 2 < poolSize {
poolSize = 2
}
waitGroup := &sync.WaitGroup{}
p, _ := ants.NewPoolWithFunc(poolSize, func(arg interface{}) {
defer waitGroup.Done()
assetAbsPath := arg.(string)
text := util.Tesseract(assetAbsPath) text := util.Tesseract(assetAbsPath)
p := strings.TrimPrefix(assetAbsPath, assetsPath) p := strings.TrimPrefix(assetAbsPath, assetsPath)
p = "assets" + filepath.ToSlash(p) p = "assets" + filepath.ToSlash(p)
@ -49,19 +38,11 @@ func autoOCRAssets() {
util.AssetsTexts[p] = text util.AssetsTexts[p] = text
util.AssetsTextsLock.Unlock() util.AssetsTextsLock.Unlock()
util.AssetsTextsChanged = true util.AssetsTextsChanged = true
})
for i, assetAbsPath := range assets { if 16 <= i { // 一次任务中最多处理 16 张图片,防止卡顿
waitGroup.Add(1)
p.Invoke(assetAbsPath)
if 63 <= i { // 一次任务中最多处理 64 张图片,防止卡顿
break break
} }
} }
waitGroup.Wait()
p.Release()
} }
cleanNotExistAssetsTexts() cleanNotExistAssetsTexts()

View file

@ -64,12 +64,17 @@ func IsTesseractExtractable(p string) bool {
return strings.HasSuffix(lowerName, ".png") || strings.HasSuffix(lowerName, ".jpg") || strings.HasSuffix(lowerName, ".jpeg") return strings.HasSuffix(lowerName, ".png") || strings.HasSuffix(lowerName, ".jpg") || strings.HasSuffix(lowerName, ".jpeg")
} }
// tesseractOCRLock 用于 Tesseract OCR 加锁串行执行提升稳定性 https://github.com/siyuan-note/siyuan/issues/7265
var tesseractOCRLock = sync.Mutex{}
func Tesseract(imgAbsPath string) string { func Tesseract(imgAbsPath string) string {
if ContainerStd != Container || !TesseractEnabled { if ContainerStd != Container || !TesseractEnabled {
return "" return ""
} }
defer logging.Recover() defer logging.Recover()
tesseractOCRLock.Lock()
defer tesseractOCRLock.Unlock()
if !IsTesseractExtractable(imgAbsPath) { if !IsTesseractExtractable(imgAbsPath) {
return "" return ""