🎨 改进图片 OCR 提取文本结果中的多余字符 Fix https://github.com/siyuan-note/siyuan/issues/7109

This commit is contained in:
Liang Ding 2023-01-18 11:46:51 +08:00
parent 2ffec98b71
commit 519f015498
No known key found for this signature in database
GPG key ID: 136F30F901A2231D
2 changed files with 30 additions and 3 deletions

View file

@ -23,7 +23,6 @@ import (
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"sync"
"time"
@ -90,8 +89,7 @@ func Tesseract(imgAbsPath string) string {
ret := string(output)
ret = gulu.Str.RemoveInvisible(ret)
reg := regexp.MustCompile("\\s{2,}")
ret = reg.ReplaceAllString(ret, " ")
ret = RemoveRedundantSpace(ret)
msg := fmt.Sprintf("OCR [%s] [%s]", info.Name(), ret)
PushStatusBar(msg)
return ret