mirror of
https://github.com/siyuan-note/siyuan.git
synced 2025-12-17 07:00:12 +01:00
🎨 Tesseract OCR 加锁串行执行提升稳定性 Fix https://github.com/siyuan-note/siyuan/issues/7265
This commit is contained in:
parent
a3444452aa
commit
9404330f23
2 changed files with 8 additions and 22 deletions
|
|
@ -1,21 +1,18 @@
|
||||||
package model
|
package model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/siyuan-note/siyuan/kernel/task"
|
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
|
||||||
"runtime/debug"
|
"runtime/debug"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/88250/gulu"
|
"github.com/88250/gulu"
|
||||||
"github.com/dustin/go-humanize"
|
"github.com/dustin/go-humanize"
|
||||||
"github.com/panjf2000/ants/v2"
|
|
||||||
"github.com/siyuan-note/logging"
|
"github.com/siyuan-note/logging"
|
||||||
"github.com/siyuan-note/siyuan/kernel/cache"
|
"github.com/siyuan-note/siyuan/kernel/cache"
|
||||||
|
"github.com/siyuan-note/siyuan/kernel/task"
|
||||||
"github.com/siyuan-note/siyuan/kernel/util"
|
"github.com/siyuan-note/siyuan/kernel/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -33,15 +30,7 @@ func autoOCRAssets() {
|
||||||
assetsPath := util.GetDataAssetsAbsPath()
|
assetsPath := util.GetDataAssetsAbsPath()
|
||||||
assets := getUnOCRAssetsAbsPaths()
|
assets := getUnOCRAssetsAbsPaths()
|
||||||
if 0 < len(assets) {
|
if 0 < len(assets) {
|
||||||
poolSize := runtime.NumCPU()
|
for i, assetAbsPath := range assets {
|
||||||
if 2 < poolSize {
|
|
||||||
poolSize = 2
|
|
||||||
}
|
|
||||||
waitGroup := &sync.WaitGroup{}
|
|
||||||
p, _ := ants.NewPoolWithFunc(poolSize, func(arg interface{}) {
|
|
||||||
defer waitGroup.Done()
|
|
||||||
|
|
||||||
assetAbsPath := arg.(string)
|
|
||||||
text := util.Tesseract(assetAbsPath)
|
text := util.Tesseract(assetAbsPath)
|
||||||
p := strings.TrimPrefix(assetAbsPath, assetsPath)
|
p := strings.TrimPrefix(assetAbsPath, assetsPath)
|
||||||
p = "assets" + filepath.ToSlash(p)
|
p = "assets" + filepath.ToSlash(p)
|
||||||
|
|
@ -49,19 +38,11 @@ func autoOCRAssets() {
|
||||||
util.AssetsTexts[p] = text
|
util.AssetsTexts[p] = text
|
||||||
util.AssetsTextsLock.Unlock()
|
util.AssetsTextsLock.Unlock()
|
||||||
util.AssetsTextsChanged = true
|
util.AssetsTextsChanged = true
|
||||||
})
|
|
||||||
|
|
||||||
for i, assetAbsPath := range assets {
|
if 16 <= i { // 一次任务中最多处理 16 张图片,防止卡顿
|
||||||
waitGroup.Add(1)
|
|
||||||
p.Invoke(assetAbsPath)
|
|
||||||
|
|
||||||
if 63 <= i { // 一次任务中最多处理 64 张图片,防止卡顿
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
waitGroup.Wait()
|
|
||||||
p.Release()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanNotExistAssetsTexts()
|
cleanNotExistAssetsTexts()
|
||||||
|
|
|
||||||
|
|
@ -64,12 +64,17 @@ func IsTesseractExtractable(p string) bool {
|
||||||
return strings.HasSuffix(lowerName, ".png") || strings.HasSuffix(lowerName, ".jpg") || strings.HasSuffix(lowerName, ".jpeg")
|
return strings.HasSuffix(lowerName, ".png") || strings.HasSuffix(lowerName, ".jpg") || strings.HasSuffix(lowerName, ".jpeg")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tesseractOCRLock 用于 Tesseract OCR 加锁串行执行提升稳定性 https://github.com/siyuan-note/siyuan/issues/7265
|
||||||
|
var tesseractOCRLock = sync.Mutex{}
|
||||||
|
|
||||||
func Tesseract(imgAbsPath string) string {
|
func Tesseract(imgAbsPath string) string {
|
||||||
if ContainerStd != Container || !TesseractEnabled {
|
if ContainerStd != Container || !TesseractEnabled {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
defer logging.Recover()
|
defer logging.Recover()
|
||||||
|
tesseractOCRLock.Lock()
|
||||||
|
defer tesseractOCRLock.Unlock()
|
||||||
|
|
||||||
if !IsTesseractExtractable(imgAbsPath) {
|
if !IsTesseractExtractable(imgAbsPath) {
|
||||||
return ""
|
return ""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue