diff --git a/kernel/treenode/node.go b/kernel/treenode/node.go index f3f9ef46f..2be608175 100644 --- a/kernel/treenode/node.go +++ b/kernel/treenode/node.go @@ -18,10 +18,11 @@ package treenode import ( "bytes" - "github.com/88250/gulu" + "path/filepath" "strings" "sync" + "github.com/88250/gulu" "github.com/88250/lute" "github.com/88250/lute/ast" "github.com/88250/lute/editor" @@ -31,6 +32,7 @@ import ( "github.com/88250/lute/render" "github.com/88250/lute/util" "github.com/siyuan-note/logging" + util2 "github.com/siyuan-note/siyuan/kernel/util" ) func GetBlockRef(n *ast.Node) (blockRefID, blockRefText, blockRefSubtype string) { @@ -107,6 +109,19 @@ func NodeStaticContent(node *ast.Node, excludeTypes []string) string { switch n.Type { case ast.NodeLinkText: buf.Write(n.Tokens) + + if nil != n.Parent && ast.NodeImage == n.Parent.Type { + destNode := n.Parent.ChildByType(ast.NodeLinkDest) + if nil != destNode { + // 桌面端支持搜索图片中的文本 https://github.com/siyuan-note/siyuan/issues/3470 + // 尝试 OCR 识别图片中的文字并作为图片的 alt + if text := util2.Tesseract(filepath.Join(util2.DataDir, destNode.TokensStr())); "" != text { + buf.WriteByte(' ') + buf.WriteString(text) + } + } + } + buf.WriteByte(' ') case ast.NodeLinkDest: buf.Write(n.Tokens) diff --git a/kernel/util/ocr.go b/kernel/util/ocr.go new file mode 100644 index 000000000..8b2ee413a --- /dev/null +++ b/kernel/util/ocr.go @@ -0,0 +1,105 @@ +// SiYuan - Build Your Eternal Digital Garden +// Copyright (c) 2020-present, b3log.org +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package util + +import ( + "bytes" + "context" + "os" + "os/exec" + "regexp" + "strings" + "time" + + "github.com/88250/gulu" + "github.com/dgraph-io/ristretto" + "github.com/siyuan-note/logging" +) + +var tesseractEnabled bool + +func initTesseract() { + ver := getTesseractVer() + if "" == ver { + return + } + + logging.LogInfof("tesseract-ocr enabled [ver=%s]", ver) +} + +func getTesseractVer() (ret string) { + cmd := exec.Command("tesseract", "--version") + gulu.CmdAttr(cmd) + data, err := cmd.CombinedOutput() + if nil == err && strings.HasPrefix(string(data), "tesseract v") { + parts := bytes.Split(data, []byte("\n")) + if 0 < len(parts) { + ret = strings.TrimPrefix(string(parts[0]), "tesseract ") + ret = strings.TrimSpace(ret) + tesseractEnabled = true + } + return + } + return +} + +var ocrResultCache, _ = ristretto.NewCache(&ristretto.Config{ + NumCounters: 100000, + MaxCost: 1000 * 1000 * 64, + BufferItems: 64, +}) + +func Tesseract(imgAbsPath string) string { + if ContainerStd != Container || !tesseractEnabled { + return "" + } + + info, err := os.Stat(imgAbsPath) + if nil != err { + return "" + } + + cached, ok := ocrResultCache.Get(imgAbsPath) + if ok { + return cached.(string) + } + + defer logging.Recover() + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + cmd := exec.CommandContext(ctx, "tesseract", "-c", "debug_file=/dev/null", imgAbsPath, "stdout", "-l", "chi_sim+eng") + gulu.CmdAttr(cmd) + output, err := cmd.CombinedOutput() + if ctx.Err() == context.DeadlineExceeded { + logging.LogWarnf("tesseract [path=%s, size=%d] timeout", imgAbsPath, info.Size()) + return "" + } + + if nil != err { + logging.LogWarnf("tesseract [path=%s, size=%d] failed: %s", imgAbsPath, info.Size(), err) + return "" + } + + ret := string(output) + reg := regexp.MustCompile("\\s+") + ret = reg.ReplaceAllString(ret, "") + logging.LogInfof("tesseract [path=%s, size=%d]: %s", imgAbsPath, info.Size(), ret) + ocrResultCache.Set(imgAbsPath, ret, info.Size()) + return ret +} diff --git a/kernel/util/working.go b/kernel/util/working.go index f49a8accc..4c6385a62 100644 --- a/kernel/util/working.go +++ b/kernel/util/working.go @@ -113,6 +113,7 @@ func Boot() { initPathDir() go initPandoc() + go initTesseract() bootBanner := figure.NewColorFigure("SiYuan", "isometric3", "green", true) logging.LogInfof("\n" + bootBanner.String())