mirror of
https://github.com/siyuan-note/siyuan.git
synced 2025-12-17 23:20:13 +01:00
🎨 改进图片 OCR 提取文本结果中的多余字符 Fix https://github.com/siyuan-note/siyuan/issues/7109
This commit is contained in:
parent
2ffec98b71
commit
519f015498
2 changed files with 30 additions and 3 deletions
|
|
@ -17,7 +17,9 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/88250/lute/html"
|
||||
)
|
||||
|
|
@ -39,3 +41,30 @@ func Reverse(s string) string {
|
|||
}
|
||||
return string(runes)
|
||||
}
|
||||
|
||||
func RemoveRedundantSpace(str string) string {
|
||||
buf := bytes.Buffer{}
|
||||
lastIsChinese := false
|
||||
lastIsSpace := false
|
||||
for _, r := range str {
|
||||
if unicode.IsSpace(r) {
|
||||
if lastIsChinese || lastIsSpace {
|
||||
continue
|
||||
}
|
||||
buf.WriteRune(' ')
|
||||
lastIsChinese = false
|
||||
lastIsSpace = true
|
||||
continue
|
||||
}
|
||||
|
||||
lastIsSpace = false
|
||||
buf.WriteRune(r)
|
||||
if unicode.Is(unicode.Han, r) {
|
||||
lastIsChinese = true
|
||||
continue
|
||||
} else {
|
||||
lastIsChinese = false
|
||||
}
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue