mirror of
https://github.com/siyuan-note/siyuan.git
synced 2025-12-18 15:40:12 +01:00
Merge remote-tracking branch 'origin/dev' into dev
This commit is contained in:
commit
14e4f7bb5e
2 changed files with 64 additions and 23 deletions
|
|
@ -106,20 +106,34 @@ func NodeStaticContent(node *ast.Node, excludeTypes []string) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
switch n.Type {
|
switch n.Type {
|
||||||
case ast.NodeLinkText:
|
case ast.NodeImage:
|
||||||
buf.Write(n.Tokens)
|
linkDest := n.ChildByType(ast.NodeLinkDest)
|
||||||
|
var linkDestStr, ocrText string
|
||||||
if nil != n.Parent && ast.NodeImage == n.Parent.Type {
|
if nil != linkDest {
|
||||||
destNode := n.Parent.ChildByType(ast.NodeLinkDest)
|
linkDestStr = linkDest.TokensStr()
|
||||||
if nil != destNode {
|
ocrText = util2.GetAssetText(linkDestStr)
|
||||||
// 桌面端支持搜索图片 OCR 文本 https://github.com/siyuan-note/siyuan/issues/3470
|
|
||||||
if text := util2.GetAssetText(destNode.TokensStr()); "" != text {
|
|
||||||
buf.WriteByte(' ')
|
|
||||||
buf.WriteString(text)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
linkText := n.ChildByType(ast.NodeLinkText)
|
||||||
|
if nil != linkText {
|
||||||
|
buf.Write(linkText.Tokens)
|
||||||
|
buf.WriteByte(' ')
|
||||||
|
}
|
||||||
|
if "" != ocrText {
|
||||||
|
buf.WriteString(ocrText)
|
||||||
|
buf.WriteByte(' ')
|
||||||
|
}
|
||||||
|
if nil != linkDest {
|
||||||
|
buf.Write(n.Tokens)
|
||||||
|
buf.WriteByte(' ')
|
||||||
|
|
||||||
|
}
|
||||||
|
if linkTitle := n.ChildByType(ast.NodeLinkTitle); nil != linkTitle {
|
||||||
|
buf.Write(linkTitle.Tokens)
|
||||||
|
}
|
||||||
|
return ast.WalkSkipChildren
|
||||||
|
case ast.NodeLinkText:
|
||||||
|
buf.Write(n.Tokens)
|
||||||
buf.WriteByte(' ')
|
buf.WriteByte(' ')
|
||||||
case ast.NodeLinkDest:
|
case ast.NodeLinkDest:
|
||||||
buf.Write(n.Tokens)
|
buf.Write(n.Tokens)
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ import (
|
||||||
|
|
||||||
"github.com/88250/gulu"
|
"github.com/88250/gulu"
|
||||||
"github.com/dustin/go-humanize"
|
"github.com/dustin/go-humanize"
|
||||||
|
"github.com/panjf2000/ants/v2"
|
||||||
"github.com/siyuan-note/logging"
|
"github.com/siyuan-note/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -41,10 +42,22 @@ var (
|
||||||
assetsTextsChanged = false
|
assetsTextsChanged = false
|
||||||
)
|
)
|
||||||
|
|
||||||
func GetAssetText(assets string) string {
|
func GetAssetText(asset string) string {
|
||||||
assetsTextsLock.Lock()
|
assetsTextsLock.Lock()
|
||||||
defer assetsTextsLock.Unlock()
|
ret, ok := assetsTexts[asset]
|
||||||
return assetsTexts[assets]
|
assetsTextsLock.Unlock()
|
||||||
|
if ok {
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
assetsPath := GetDataAssetsAbsPath()
|
||||||
|
assetAbsPath := strings.TrimPrefix(asset, "assets")
|
||||||
|
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
|
||||||
|
ret = Tesseract(assetAbsPath)
|
||||||
|
assetsTextsLock.Lock()
|
||||||
|
assetsTexts[asset] = ret
|
||||||
|
assetsTextsLock.Unlock()
|
||||||
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
func Tesseract(imgAbsPath string) string {
|
func Tesseract(imgAbsPath string) string {
|
||||||
|
|
@ -68,15 +81,11 @@ func Tesseract(imgAbsPath string) string {
|
||||||
output, err := cmd.CombinedOutput()
|
output, err := cmd.CombinedOutput()
|
||||||
if ctx.Err() == context.DeadlineExceeded {
|
if ctx.Err() == context.DeadlineExceeded {
|
||||||
logging.LogWarnf("tesseract [path=%s, size=%d] timeout", imgAbsPath, info.Size())
|
logging.LogWarnf("tesseract [path=%s, size=%d] timeout", imgAbsPath, info.Size())
|
||||||
assetsTexts[imgAbsPath] = ""
|
|
||||||
assetsTextsChanged = true
|
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
if nil != err {
|
if nil != err {
|
||||||
logging.LogWarnf("tesseract [path=%s, size=%d] failed: %s", imgAbsPath, info.Size(), err)
|
logging.LogWarnf("tesseract [path=%s, size=%d] failed: %s", imgAbsPath, info.Size(), err)
|
||||||
assetsTexts[imgAbsPath] = ""
|
|
||||||
assetsTextsChanged = true
|
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -87,8 +96,6 @@ func Tesseract(imgAbsPath string) string {
|
||||||
reg := regexp.MustCompile("\\s{2,}")
|
reg := regexp.MustCompile("\\s{2,}")
|
||||||
ret = reg.ReplaceAllString(ret, " ")
|
ret = reg.ReplaceAllString(ret, " ")
|
||||||
logging.LogInfof("tesseract [path=%s, size=%d, text=%s, elapsed=%dms]", imgAbsPath, info.Size(), ret, time.Since(now).Milliseconds())
|
logging.LogInfof("tesseract [path=%s, size=%d, text=%s, elapsed=%dms]", imgAbsPath, info.Size(), ret, time.Since(now).Milliseconds())
|
||||||
assetsTexts[imgAbsPath] = ret
|
|
||||||
assetsTextsChanged = true
|
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -98,10 +105,30 @@ func AutoOCRAssets() {
|
||||||
}
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
|
assetsPath := GetDataAssetsAbsPath()
|
||||||
assets := getUnOCRAssetsAbsPaths()
|
assets := getUnOCRAssetsAbsPaths()
|
||||||
for _, p := range assets {
|
|
||||||
Tesseract(p)
|
waitGroup := &sync.WaitGroup{}
|
||||||
|
lock := &sync.Mutex{}
|
||||||
|
p, _ := ants.NewPoolWithFunc(4, func(arg interface{}) {
|
||||||
|
defer waitGroup.Done()
|
||||||
|
|
||||||
|
assetAbsPath := arg.(string)
|
||||||
|
text := Tesseract(assetAbsPath)
|
||||||
|
p := strings.TrimPrefix(assetAbsPath, assetsPath)
|
||||||
|
p = "assets" + filepath.ToSlash(p)
|
||||||
|
lock.Lock()
|
||||||
|
assetsTexts[p] = text
|
||||||
|
lock.Unlock()
|
||||||
|
assetsTextsChanged = true
|
||||||
|
})
|
||||||
|
for _, assetAbsPath := range assets {
|
||||||
|
waitGroup.Add(1)
|
||||||
|
p.Invoke(assetAbsPath)
|
||||||
}
|
}
|
||||||
|
waitGroup.Wait()
|
||||||
|
p.Release()
|
||||||
|
|
||||||
time.Sleep(7 * time.Second)
|
time.Sleep(7 * time.Second)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue