Merge remote-tracking branch 'origin/dev' into dev

This commit is contained in:
Vanessa 2023-01-16 16:02:08 +08:00
commit 14e4f7bb5e
2 changed files with 64 additions and 23 deletions

View file

@ -106,20 +106,34 @@ func NodeStaticContent(node *ast.Node, excludeTypes []string) string {
} }
switch n.Type { switch n.Type {
case ast.NodeLinkText: case ast.NodeImage:
buf.Write(n.Tokens) linkDest := n.ChildByType(ast.NodeLinkDest)
var linkDestStr, ocrText string
if nil != n.Parent && ast.NodeImage == n.Parent.Type { if nil != linkDest {
destNode := n.Parent.ChildByType(ast.NodeLinkDest) linkDestStr = linkDest.TokensStr()
if nil != destNode { ocrText = util2.GetAssetText(linkDestStr)
// 桌面端支持搜索图片 OCR 文本 https://github.com/siyuan-note/siyuan/issues/3470
if text := util2.GetAssetText(destNode.TokensStr()); "" != text {
buf.WriteByte(' ')
buf.WriteString(text)
}
}
} }
linkText := n.ChildByType(ast.NodeLinkText)
if nil != linkText {
buf.Write(linkText.Tokens)
buf.WriteByte(' ')
}
if "" != ocrText {
buf.WriteString(ocrText)
buf.WriteByte(' ')
}
if nil != linkDest {
buf.Write(n.Tokens)
buf.WriteByte(' ')
}
if linkTitle := n.ChildByType(ast.NodeLinkTitle); nil != linkTitle {
buf.Write(linkTitle.Tokens)
}
return ast.WalkSkipChildren
case ast.NodeLinkText:
buf.Write(n.Tokens)
buf.WriteByte(' ') buf.WriteByte(' ')
case ast.NodeLinkDest: case ast.NodeLinkDest:
buf.Write(n.Tokens) buf.Write(n.Tokens)

View file

@ -31,6 +31,7 @@ import (
"github.com/88250/gulu" "github.com/88250/gulu"
"github.com/dustin/go-humanize" "github.com/dustin/go-humanize"
"github.com/panjf2000/ants/v2"
"github.com/siyuan-note/logging" "github.com/siyuan-note/logging"
) )
@ -41,10 +42,22 @@ var (
assetsTextsChanged = false assetsTextsChanged = false
) )
func GetAssetText(assets string) string { func GetAssetText(asset string) string {
assetsTextsLock.Lock() assetsTextsLock.Lock()
defer assetsTextsLock.Unlock() ret, ok := assetsTexts[asset]
return assetsTexts[assets] assetsTextsLock.Unlock()
if ok {
return ret
}
assetsPath := GetDataAssetsAbsPath()
assetAbsPath := strings.TrimPrefix(asset, "assets")
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
ret = Tesseract(assetAbsPath)
assetsTextsLock.Lock()
assetsTexts[asset] = ret
assetsTextsLock.Unlock()
return ret
} }
func Tesseract(imgAbsPath string) string { func Tesseract(imgAbsPath string) string {
@ -68,15 +81,11 @@ func Tesseract(imgAbsPath string) string {
output, err := cmd.CombinedOutput() output, err := cmd.CombinedOutput()
if ctx.Err() == context.DeadlineExceeded { if ctx.Err() == context.DeadlineExceeded {
logging.LogWarnf("tesseract [path=%s, size=%d] timeout", imgAbsPath, info.Size()) logging.LogWarnf("tesseract [path=%s, size=%d] timeout", imgAbsPath, info.Size())
assetsTexts[imgAbsPath] = ""
assetsTextsChanged = true
return "" return ""
} }
if nil != err { if nil != err {
logging.LogWarnf("tesseract [path=%s, size=%d] failed: %s", imgAbsPath, info.Size(), err) logging.LogWarnf("tesseract [path=%s, size=%d] failed: %s", imgAbsPath, info.Size(), err)
assetsTexts[imgAbsPath] = ""
assetsTextsChanged = true
return "" return ""
} }
@ -87,8 +96,6 @@ func Tesseract(imgAbsPath string) string {
reg := regexp.MustCompile("\\s{2,}") reg := regexp.MustCompile("\\s{2,}")
ret = reg.ReplaceAllString(ret, " ") ret = reg.ReplaceAllString(ret, " ")
logging.LogInfof("tesseract [path=%s, size=%d, text=%s, elapsed=%dms]", imgAbsPath, info.Size(), ret, time.Since(now).Milliseconds()) logging.LogInfof("tesseract [path=%s, size=%d, text=%s, elapsed=%dms]", imgAbsPath, info.Size(), ret, time.Since(now).Milliseconds())
assetsTexts[imgAbsPath] = ret
assetsTextsChanged = true
return ret return ret
} }
@ -98,10 +105,30 @@ func AutoOCRAssets() {
} }
for { for {
assetsPath := GetDataAssetsAbsPath()
assets := getUnOCRAssetsAbsPaths() assets := getUnOCRAssetsAbsPaths()
for _, p := range assets {
Tesseract(p) waitGroup := &sync.WaitGroup{}
lock := &sync.Mutex{}
p, _ := ants.NewPoolWithFunc(4, func(arg interface{}) {
defer waitGroup.Done()
assetAbsPath := arg.(string)
text := Tesseract(assetAbsPath)
p := strings.TrimPrefix(assetAbsPath, assetsPath)
p = "assets" + filepath.ToSlash(p)
lock.Lock()
assetsTexts[p] = text
lock.Unlock()
assetsTextsChanged = true
})
for _, assetAbsPath := range assets {
waitGroup.Add(1)
p.Invoke(assetAbsPath)
} }
waitGroup.Wait()
p.Release()
time.Sleep(7 * time.Second) time.Sleep(7 * time.Second)
} }
} }