mirror of
https://github.com/siyuan-note/siyuan.git
synced 2026-01-01 06:18:49 +01:00
🎨 改进图片 OCR 提取文本结果中的多余字符 Fix https://github.com/siyuan-note/siyuan/issues/7109
This commit is contained in:
parent
2ffec98b71
commit
519f015498
2 changed files with 30 additions and 3 deletions
|
|
@ -23,7 +23,6 @@ import (
|
|||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
|
@ -90,8 +89,7 @@ func Tesseract(imgAbsPath string) string {
|
|||
|
||||
ret := string(output)
|
||||
ret = gulu.Str.RemoveInvisible(ret)
|
||||
reg := regexp.MustCompile("\\s{2,}")
|
||||
ret = reg.ReplaceAllString(ret, " ")
|
||||
ret = RemoveRedundantSpace(ret)
|
||||
msg := fmt.Sprintf("OCR [%s] [%s]", info.Name(), ret)
|
||||
PushStatusBar(msg)
|
||||
return ret
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue