From efec2c749e2872b911947666b9561f6d940004ea Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Fri, 10 Nov 2023 11:15:15 +0800 Subject: [PATCH 1/3] :art: Supports via environment var `SIYUAN_TESSERACT_ENABLED=false` to close OCR Fix https://github.com/siyuan-note/siyuan/issues/9619 --- kernel/util/tesseract.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/util/tesseract.go b/kernel/util/tesseract.go index 0c2e0f72b..17d50ac25 100644 --- a/kernel/util/tesseract.go +++ b/kernel/util/tesseract.go @@ -149,6 +149,17 @@ func InitTesseract() { } } + // Supports via environment var `SIYUAN_TESSERACT_ENABLED=false` to close OCR https://github.com/siyuan-note/siyuan/issues/9619 + if enabled := os.Getenv("SIYUAN_TESSERACT_ENABLED"); "" != enabled { + if enabledBool, parseErr := strconv.ParseBool(enabled); nil == parseErr { + TesseractEnabled = enabledBool + if !enabledBool { + logging.LogInfof("tesseract-ocr disabled by env") + return + } + } + } + TesseractLangs = filterTesseractLangs(langs) logging.LogInfof("tesseract-ocr enabled [ver=%s, maxSize=%s, langs=%s]", ver, humanize.Bytes(TesseractMaxSize), strings.Join(TesseractLangs, "+")) } From 524776c0056c7f92967b88da39c8cc826653c84d Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Fri, 10 Nov 2023 11:23:27 +0800 Subject: [PATCH 2/3] :art: Supports via environment var `SIYUAN_TESSERACT_ENABLED=false` to close OCR Fix https://github.com/siyuan-note/siyuan/issues/9619 --- kernel/model/ocr.go | 5 +++-- kernel/util/tesseract.go | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/model/ocr.go b/kernel/model/ocr.go index 8acb61f8c..067a32232 100644 --- a/kernel/model/ocr.go +++ b/kernel/model/ocr.go @@ -37,8 +37,9 @@ func autoOCRAssets() { util.AssetsTextsLock.Lock() util.AssetsTexts[p] = text util.AssetsTextsLock.Unlock() - util.AssetsTextsChanged = true - + if "" != text { + util.AssetsTextsChanged = true + } if 4 <= i { // 一次任务中最多处理 4 张图片,防止卡顿 break } diff --git a/kernel/util/tesseract.go b/kernel/util/tesseract.go index 17d50ac25..589fd9b3c 100644 --- a/kernel/util/tesseract.go +++ b/kernel/util/tesseract.go @@ -69,7 +69,9 @@ func GetAssetText(asset string, force bool) string { AssetsTextsLock.Lock() AssetsTexts[asset] = ret AssetsTextsLock.Unlock() - AssetsTextsChanged = true + if "" != ret { + AssetsTextsChanged = true + } return ret } From 29f8f0fe85910934325c4944b2b8aa85d2c06041 Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Fri, 10 Nov 2023 11:30:23 +0800 Subject: [PATCH 3/3] :art: Supports via environment var `SIYUAN_TESSERACT_ENABLED=false` to close OCR Fix https://github.com/siyuan-note/siyuan/issues/9619 --- .../20200924100744-br924ar.sy | 47 ++++++++++++------- .../20200915214115-42b8zma.sy | 17 +++++-- .../20211226123038-4umgpxy.sy | 17 +++++-- 3 files changed, 56 insertions(+), 25 deletions(-) diff --git a/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20210808180303-xaduj2o/20200924100744-br924ar.sy b/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20210808180303-xaduj2o/20200924100744-br924ar.sy index ec432eadb..93908c220 100644 --- a/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20210808180303-xaduj2o/20200924100744-br924ar.sy +++ b/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20210808180303-xaduj2o/20200924100744-br924ar.sy @@ -6,7 +6,7 @@ "id": "20200924100744-br924ar", "title": "Assets", "type": "doc", - "updated": "20230820185510" + "updated": "20231110112758" }, "Children": [ { @@ -856,7 +856,7 @@ "ListData": {}, "Properties": { "id": "20230202231731-bdh7lab", - "updated": "20230211103249" + "updated": "20231110112758" }, "Children": [ { @@ -868,7 +868,7 @@ }, "Properties": { "id": "20230202231732-n7z8jth", - "updated": "20230211103249" + "updated": "20231110112758" }, "Children": [ { @@ -876,7 +876,7 @@ "Type": "NodeParagraph", "Properties": { "id": "20230202231732-f3jkj7p", - "updated": "20230211103249" + "updated": "20231110112758" }, "Children": [ { @@ -908,7 +908,16 @@ }, { "Type": "NodeText", - "Data": "​ in the kernel boot log" + "Data": "​ in the kernel boot log; If you want to disable OCR, set the environment variable " + }, + { + "Type": "NodeTextMark", + "TextMarkType": "code", + "TextMarkTextContent": "SIYUAN\\_TESSERACT\\_ENABLED=false" + }, + { + "Type": "NodeText", + "Data": "​" } ] } @@ -936,7 +945,11 @@ "Children": [ { "Type": "NodeText", - "Data": "SiYuan does not enable all installed language packs, because too many language packs will cause OCR to be slow or even timeout and return empty results, and take up too many system resources, so by default only " + "Data": "SiYuan" + }, + { + "Type": "NodeText", + "Data": " does not enable all installed language packs, because too many language packs will cause OCR to be slow or even timeout and return empty results, and take up too many system resources, so by default only " }, { "Type": "NodeTextMark", @@ -945,7 +958,7 @@ }, { "Type": "NodeText", - "Data": "​, " + "Data": "​​, " }, { "Type": "NodeTextMark", @@ -954,7 +967,7 @@ }, { "Type": "NodeText", - "Data": "​, " + "Data": "​​, " }, { "Type": "NodeTextMark", @@ -963,7 +976,7 @@ }, { "Type": "NodeText", - "Data": "​, " + "Data": "​​, " }, { "Type": "NodeTextMark", @@ -972,7 +985,7 @@ }, { "Type": "NodeText", - "Data": "​, " + "Data": "​​, " }, { "Type": "NodeTextMark", @@ -981,7 +994,7 @@ }, { "Type": "NodeText", - "Data": "​, " + "Data": "​​, " }, { "Type": "NodeTextMark", @@ -990,7 +1003,7 @@ }, { "Type": "NodeText", - "Data": "​ and " + "Data": "​​ and " }, { "Type": "NodeTextMark", @@ -999,7 +1012,7 @@ }, { "Type": "NodeText", - "Data": "​ will be enabled at most, if you need more accurate language support, you can specify the language pack by configuring the environment variable " + "Data": "​​ will be enabled at most, if you need more accurate language support, you can specify the language pack by configuring the environment variable " }, { "Type": "NodeTextMark", @@ -1008,7 +1021,7 @@ }, { "Type": "NodeText", - "Data": "​, such as " + "Data": "​​, such as " }, { "Type": "NodeTextMark", @@ -1017,7 +1030,7 @@ }, { "Type": "NodeText", - "Data": "​" + "Data": "​​" } ] } @@ -1081,7 +1094,7 @@ }, { "Type": "NodeText", - "Data": "​, the unit of value is bytes, for example: " + "Data": "​​, the unit of value is bytes, for example: " }, { "Type": "NodeTextMark", @@ -1090,7 +1103,7 @@ }, { "Type": "NodeText", - "Data": "​ adjust the upper limit to 4MB" + "Data": "​​ adjust the upper limit to 4MB" } ] } diff --git a/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20210808180321-hbvl5c2/20200915214115-42b8zma.sy b/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20210808180321-hbvl5c2/20200915214115-42b8zma.sy index 1cf18f7b8..60566546e 100644 --- a/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20210808180321-hbvl5c2/20200915214115-42b8zma.sy +++ b/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20210808180321-hbvl5c2/20200915214115-42b8zma.sy @@ -6,7 +6,7 @@ "id": "20200915214115-42b8zma", "title": "资源文件", "type": "doc", - "updated": "20230820185522" + "updated": "20231110112936" }, "Children": [ { @@ -878,7 +878,7 @@ "ListData": {}, "Properties": { "id": "20230202231309-pcjl7c2", - "updated": "20230211103308" + "updated": "20231110112936" }, "Children": [ { @@ -890,7 +890,7 @@ }, "Properties": { "id": "20230202231311-7qdk1za", - "updated": "20230211103308" + "updated": "20231110112936" }, "Children": [ { @@ -898,7 +898,7 @@ "Type": "NodeParagraph", "Properties": { "id": "20230202231311-n1pf7in", - "updated": "20230211103308" + "updated": "20231110112936" }, "Children": [ { @@ -928,6 +928,15 @@ "TextMarkType": "code", "TextMarkTextContent": "tesseract-ocr enabled" }, + { + "Type": "NodeText", + "Data": "​;如果要禁用 OCR,请设置环境变量 " + }, + { + "Type": "NodeTextMark", + "TextMarkType": "code", + "TextMarkTextContent": "SIYUAN_TESSERACT_ENABLED=false" + }, { "Type": "NodeText", "Data": "​" diff --git a/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20211226121203-rjjngpz/20211226123038-4umgpxy.sy b/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20211226121203-rjjngpz/20211226123038-4umgpxy.sy index 34fe63580..06f097845 100644 --- a/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20211226121203-rjjngpz/20211226123038-4umgpxy.sy +++ b/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20211226121203-rjjngpz/20211226123038-4umgpxy.sy @@ -5,7 +5,7 @@ "Properties": { "id": "20211226123038-4umgpxy", "title": "資料文件", - "updated": "20230820185516" + "updated": "20231110112927" }, "Children": [ { @@ -861,7 +861,7 @@ "ListData": {}, "Properties": { "id": "20230202231516-o6k9mj1", - "updated": "20230211103259" + "updated": "20231110112927" }, "Children": [ { @@ -873,7 +873,7 @@ }, "Properties": { "id": "20230202231516-pwj2ndg", - "updated": "20230211103259" + "updated": "20231110112927" }, "Children": [ { @@ -881,7 +881,7 @@ "Type": "NodeParagraph", "Properties": { "id": "20230202231516-8trf08t", - "updated": "20230211103259" + "updated": "20231110112927" }, "Children": [ { @@ -911,6 +911,15 @@ "TextMarkType": "code", "TextMarkTextContent": "tesseract-ocr enabled" }, + { + "Type": "NodeText", + "Data": "​;如果要停用 OCR,請設定環境變量 " + }, + { + "Type": "NodeTextMark", + "TextMarkType": "code", + "TextMarkTextContent": "SIYUAN_TESSERACT_ENABLED=false" + }, { "Type": "NodeText", "Data": "​"