🎨 桌面端支持搜索图片 OCR 文本 https://github.com/siyuan-note/siyuan/issues/3470

This commit is contained in:
Liang Ding 2023-01-16 21:00:52 +08:00
parent 77560d6601
commit c50f231d81
No known key found for this signature in database
GPG key ID: 136F30F901A2231D
3 changed files with 51 additions and 3 deletions

View file

@ -38,6 +38,7 @@ import (
var (
tesseractEnabled bool
tesseractLangs []string
assetsTexts = map[string]string{}
assetsTextsLock = sync.Mutex{}
assetsTextsChanged = false
@ -77,7 +78,7 @@ func Tesseract(imgAbsPath string) string {
defer cancel()
now := time.Now()
cmd := exec.CommandContext(ctx, "tesseract", "-c", "debug_file=/dev/null", imgAbsPath, "stdout", "-l", "chi_sim+eng")
cmd := exec.CommandContext(ctx, "tesseract", "-c", "debug_file=/dev/null", imgAbsPath, "stdout", "-l", strings.Join(tesseractLangs, "+"))
gulu.CmdAttr(cmd)
output, err := cmd.CombinedOutput()
if ctx.Err() == context.DeadlineExceeded {
@ -281,6 +282,12 @@ func initTesseract() {
return
}
tesseractLangs = getTesseractLangs()
if 1 > len(tesseractLangs) {
logging.LogWarnf("no tesseract langs found")
tesseractEnabled = false
return
}
logging.LogInfof("tesseract-ocr enabled [ver=%s]", ver)
}
@ -303,3 +310,29 @@ func getTesseractVer() (ret string) {
}
return
}
func getTesseractLangs() (ret []string) {
if !tesseractEnabled {
return nil
}
cmd := exec.Command("tesseract", "--list-langs")
gulu.CmdAttr(cmd)
data, err := cmd.CombinedOutput()
if nil != err {
return nil
}
parts := bytes.Split(data, []byte("\n"))
if 0 < len(parts) {
parts = parts[1:]
}
for _, part := range parts {
part = bytes.TrimSpace(part)
if 0 == len(part) {
continue
}
ret = append(ret, string(part))
}
return
}