mirror of
https://github.com/siyuan-note/siyuan.git
synced 2025-12-16 14:40:12 +01:00
Kernel API OCR returns text coordinate information (#11738)
* 优化setImageOCRText接口调用 * 扩展 ocr 接口,添加 ocrJSON 返回字段 * 过滤不可见字符 * 返回的ocr文本添加空格
This commit is contained in:
parent
974f1c1183
commit
c0bd645048
5 changed files with 65 additions and 34 deletions
|
|
@ -1019,7 +1019,9 @@ export const imgMenu = (protyle: IProtyle, range: Range, assetElement: HTMLEleme
|
||||||
fetchPost("/api/asset/getImageOCRText", {
|
fetchPost("/api/asset/getImageOCRText", {
|
||||||
path: imgElement.getAttribute("src")
|
path: imgElement.getAttribute("src")
|
||||||
}, (response) => {
|
}, (response) => {
|
||||||
element.querySelector("textarea").value = response.data.text;
|
const textarea =element.querySelector("textarea")
|
||||||
|
textarea.value = response.data.text;
|
||||||
|
textarea.dataset.ocrText = response.data.text;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
|
@ -1031,11 +1033,6 @@ export const imgMenu = (protyle: IProtyle, range: Range, assetElement: HTMLEleme
|
||||||
fetchPost("/api/asset/ocr", {
|
fetchPost("/api/asset/ocr", {
|
||||||
path: imgElement.getAttribute("src"),
|
path: imgElement.getAttribute("src"),
|
||||||
force: true
|
force: true
|
||||||
}, (response) => {
|
|
||||||
fetchPost("/api/asset/setImageOCRText", {
|
|
||||||
path: imgElement.getAttribute("src"),
|
|
||||||
text: response.data.text
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}],
|
}],
|
||||||
|
|
@ -1119,6 +1116,13 @@ export const imgMenu = (protyle: IProtyle, range: Range, assetElement: HTMLEleme
|
||||||
const textElements = window.siyuan.menus.menu.element.querySelectorAll("textarea");
|
const textElements = window.siyuan.menus.menu.element.querySelectorAll("textarea");
|
||||||
textElements[0].focus();
|
textElements[0].focus();
|
||||||
window.siyuan.menus.menu.removeCB = () => {
|
window.siyuan.menus.menu.removeCB = () => {
|
||||||
|
const ocrElement = window.siyuan.menus.menu.element.querySelector('[data-type="ocr"]') as HTMLTextAreaElement;
|
||||||
|
if (ocrElement && ocrElement.dataset.ocrText !== ocrElement.value) {
|
||||||
|
fetchPost("/api/asset/setImageOCRText", {
|
||||||
|
path: imgElement.getAttribute("src"),
|
||||||
|
text: ocrElement.value
|
||||||
|
});
|
||||||
|
}
|
||||||
imgElement.setAttribute("alt", textElements[2].value.replace(/\n|\r\n|\r|\u2028|\u2029/g, ""));
|
imgElement.setAttribute("alt", textElements[2].value.replace(/\n|\r\n|\r|\u2028|\u2029/g, ""));
|
||||||
nodeElement.setAttribute("updated", dayjs().format("YYYYMMDDHHmmss"));
|
nodeElement.setAttribute("updated", dayjs().format("YYYYMMDDHHmmss"));
|
||||||
updateTransaction(protyle, id, nodeElement.outerHTML, html);
|
updateTransaction(protyle, id, nodeElement.outerHTML, html);
|
||||||
|
|
|
||||||
|
|
@ -137,13 +137,11 @@ func ocr(c *gin.Context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
path := arg["path"].(string)
|
path := arg["path"].(string)
|
||||||
force := false
|
|
||||||
if forceArg := arg["force"]; nil != forceArg {
|
|
||||||
force = forceArg.(bool)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
ocrJSON := util.OcrAsset(path)
|
||||||
ret.Data = map[string]interface{}{
|
ret.Data = map[string]interface{}{
|
||||||
"text": util.OcrAsset(path, force),
|
"text": util.GetOcrJsonText(ocrJSON),
|
||||||
|
"ocrJSON": ocrJSON,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ func autoOCRAssets() {
|
||||||
assets := getUnOCRAssetsAbsPaths()
|
assets := getUnOCRAssetsAbsPaths()
|
||||||
if 0 < len(assets) {
|
if 0 < len(assets) {
|
||||||
for i, assetAbsPath := range assets {
|
for i, assetAbsPath := range assets {
|
||||||
text := util.Tesseract(assetAbsPath)
|
text := util.GetOcrJsonText(util.Tesseract(assetAbsPath))
|
||||||
p := strings.TrimPrefix(assetAbsPath, assetsPath)
|
p := strings.TrimPrefix(assetAbsPath, assetsPath)
|
||||||
p = "assets" + filepath.ToSlash(p)
|
p = "assets" + filepath.ToSlash(p)
|
||||||
util.SetAssetText(p, text)
|
util.SetAssetText(p, text)
|
||||||
|
|
|
||||||
|
|
@ -198,7 +198,7 @@ func nodeStaticContent(node *ast.Node, excludeTypes []string, includeTextMarkATi
|
||||||
var linkDestStr, ocrText string
|
var linkDestStr, ocrText string
|
||||||
if nil != linkDest {
|
if nil != linkDest {
|
||||||
linkDestStr = linkDest.TokensStr()
|
linkDestStr = linkDest.TokensStr()
|
||||||
ocrText = util.OcrAsset(linkDestStr, false)
|
ocrText = util.GetAssetText(linkDestStr)
|
||||||
}
|
}
|
||||||
|
|
||||||
linkText := n.ChildByType(ast.NodeLinkText)
|
linkText := n.ChildByType(ast.NodeLinkText)
|
||||||
|
|
|
||||||
|
|
@ -149,22 +149,16 @@ func ExistsAssetText(asset string) (ret bool) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func OcrAsset(asset string, force bool) (ret string) {
|
func OcrAsset(asset string) (ret []map[string]interface{}) {
|
||||||
if !force {
|
|
||||||
assetsTextsLock.Lock()
|
|
||||||
ret = assetsTexts[asset]
|
|
||||||
assetsTextsLock.Unlock()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
assetsPath := GetDataAssetsAbsPath()
|
assetsPath := GetDataAssetsAbsPath()
|
||||||
assetAbsPath := strings.TrimPrefix(asset, "assets")
|
assetAbsPath := strings.TrimPrefix(asset, "assets")
|
||||||
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
|
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
|
||||||
ret = Tesseract(assetAbsPath)
|
ret = Tesseract(assetAbsPath)
|
||||||
assetsTextsLock.Lock()
|
assetsTextsLock.Lock()
|
||||||
assetsTexts[asset] = ret
|
ocrText := GetOcrJsonText(ret)
|
||||||
|
assetsTexts[asset] = ocrText
|
||||||
assetsTextsLock.Unlock()
|
assetsTextsLock.Unlock()
|
||||||
if "" != ret {
|
if "" != ocrText {
|
||||||
assetsTextsChanged.Store(true)
|
assetsTextsChanged.Store(true)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
|
|
@ -184,9 +178,9 @@ func IsTesseractExtractable(p string) bool {
|
||||||
// tesseractOCRLock 用于 Tesseract OCR 加锁串行执行提升稳定性 https://github.com/siyuan-note/siyuan/issues/7265
|
// tesseractOCRLock 用于 Tesseract OCR 加锁串行执行提升稳定性 https://github.com/siyuan-note/siyuan/issues/7265
|
||||||
var tesseractOCRLock = sync.Mutex{}
|
var tesseractOCRLock = sync.Mutex{}
|
||||||
|
|
||||||
func Tesseract(imgAbsPath string) string {
|
func Tesseract(imgAbsPath string) (ret []map[string]interface{}) {
|
||||||
if ContainerStd != Container || !TesseractEnabled {
|
if ContainerStd != Container || !TesseractEnabled {
|
||||||
return ""
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
defer logging.Recover()
|
defer logging.Recover()
|
||||||
|
|
@ -194,16 +188,16 @@ func Tesseract(imgAbsPath string) string {
|
||||||
defer tesseractOCRLock.Unlock()
|
defer tesseractOCRLock.Unlock()
|
||||||
|
|
||||||
if !IsTesseractExtractable(imgAbsPath) {
|
if !IsTesseractExtractable(imgAbsPath) {
|
||||||
return ""
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
info, err := os.Stat(imgAbsPath)
|
info, err := os.Stat(imgAbsPath)
|
||||||
if nil != err {
|
if nil != err {
|
||||||
return ""
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if TesseractMaxSize < uint64(info.Size()) {
|
if TesseractMaxSize < uint64(info.Size()) {
|
||||||
return ""
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
defer logging.Recover()
|
defer logging.Recover()
|
||||||
|
|
@ -211,24 +205,59 @@ func Tesseract(imgAbsPath string) string {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 7*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 7*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
cmd := exec.CommandContext(ctx, TesseractBin, "-c", "debug_file=/dev/null", imgAbsPath, "stdout", "-l", strings.Join(TesseractLangs, "+"))
|
cmd := exec.CommandContext(ctx, TesseractBin, "-c", "debug_file=/dev/null", imgAbsPath, "stdout", "-l", strings.Join(TesseractLangs, "+"), "tsv")
|
||||||
gulu.CmdAttr(cmd)
|
gulu.CmdAttr(cmd)
|
||||||
output, err := cmd.CombinedOutput()
|
output, err := cmd.CombinedOutput()
|
||||||
if ctx.Err() == context.DeadlineExceeded {
|
if ctx.Err() == context.DeadlineExceeded {
|
||||||
logging.LogWarnf("tesseract [path=%s, size=%d] timeout", imgAbsPath, info.Size())
|
logging.LogWarnf("tesseract [path=%s, size=%d] timeout", imgAbsPath, info.Size())
|
||||||
return ""
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if nil != err {
|
if nil != err {
|
||||||
logging.LogWarnf("tesseract [path=%s, size=%d] failed: %s", imgAbsPath, info.Size(), err)
|
logging.LogWarnf("tesseract [path=%s, size=%d] failed: %s", imgAbsPath, info.Size(), err)
|
||||||
return ""
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ret := string(output)
|
tsv := string(output)
|
||||||
|
|
||||||
|
// 按行分割 TSV 数据
|
||||||
|
lines := strings.Split(tsv, "\r\n")
|
||||||
|
|
||||||
|
// 解析 TSV 数据 跳过标题行,从第二行开始处理
|
||||||
|
for _, line := range lines[1:] {
|
||||||
|
if line == "" {
|
||||||
|
continue // 跳过空行
|
||||||
|
}
|
||||||
|
// 分割每列数据
|
||||||
|
fields := strings.Split(line, "\t")
|
||||||
|
// 将字段名和字段值映射到一个 map 中
|
||||||
|
dataMap := make(map[string]interface{})
|
||||||
|
for i, header := range strings.Split(lines[0], "\t") {
|
||||||
|
dataMap[header] = fields[i]
|
||||||
|
}
|
||||||
|
ret = append(ret, dataMap)
|
||||||
|
}
|
||||||
|
|
||||||
|
tsv = gulu.Str.RemoveInvisible(tsv)
|
||||||
|
tsv = RemoveRedundantSpace(tsv)
|
||||||
|
msg := fmt.Sprintf("OCR [%s] [%s]", html.EscapeString(info.Name()), html.EscapeString(GetOcrJsonText(ret)))
|
||||||
|
PushStatusBar(msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// 提取并连接所有 text 字段的函数
|
||||||
|
func GetOcrJsonText(jsonData []map[string]interface{}) (ret string) {
|
||||||
|
for _, dataMap := range jsonData {
|
||||||
|
// 检查 text 字段是否存在
|
||||||
|
if text, ok := dataMap["text"]; ok {
|
||||||
|
// 确保 text 是字符串类型
|
||||||
|
if textStr, ok := text.(string); ok {
|
||||||
|
ret += " " + textStr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
ret = gulu.Str.RemoveInvisible(ret)
|
ret = gulu.Str.RemoveInvisible(ret)
|
||||||
ret = RemoveRedundantSpace(ret)
|
ret = RemoveRedundantSpace(ret)
|
||||||
msg := fmt.Sprintf("OCR [%s] [%s]", html.EscapeString(info.Name()), html.EscapeString(ret))
|
|
||||||
PushStatusBar(msg)
|
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue