🎨 大于 2MB 的图片默认不进行 OCR Fix https://github.com/siyuan-note/siyuan/issues/7333

This commit is contained in:
Liang Ding 2023-02-11 10:33:53 +08:00
parent 5e254500ef
commit 3dbc6d91ed
No known key found for this signature in database
GPG key ID: 136F30F901A2231D
7 changed files with 263 additions and 26 deletions

View file

@ -4,7 +4,8 @@
"icon": "1f4d4",
"closed": false,
"refCreateSavePath": "",
"createDocNameTemplate": "",
"docCreateSavePath": "",
"dailyNoteSavePath": "/daily note/{{now | date \"2006/01\"}}/{{now | date \"2006-01-02\"}}",
"dailyNoteTemplatePath": ""
"dailyNoteTemplatePath": "",
"sortMode": 15
}

View file

@ -6,7 +6,7 @@
"id": "20200924100744-br924ar",
"title": "Assets",
"type": "doc",
"updated": "20230203183434"
"updated": "20230211103249"
},
"Children": [
{
@ -724,7 +724,7 @@
"ListData": {},
"Properties": {
"id": "20230202231731-bdh7lab",
"updated": "20230203183434"
"updated": "20230211103249"
},
"Children": [
{
@ -736,7 +736,7 @@
},
"Properties": {
"id": "20230202231732-n7z8jth",
"updated": "20230203183347"
"updated": "20230211103249"
},
"Children": [
{
@ -744,7 +744,7 @@
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231732-f3jkj7p",
"updated": "20230203183347"
"updated": "20230211103249"
},
"Children": [
{
@ -791,7 +791,7 @@
},
"Properties": {
"id": "20230202231800-z8hswmk",
"updated": "20230203183434"
"updated": "20230211103154"
},
"Children": [
{
@ -799,7 +799,7 @@
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231800-c3x45ky",
"updated": "20230203183434"
"updated": "20230211103154"
},
"Children": [
{
@ -881,7 +881,7 @@
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "SIYUAN_TESSERACT_LANGS=chi_sim+eng "
"TextMarkTextContent": "SIYUAN_TESSERACT_LANGS=chi_sim+eng"
},
{
"Type": "NodeText",
@ -890,6 +890,79 @@
]
}
]
},
{
"ID": "20230211102830-9azqf9m",
"Type": "NodeListItem",
"ListData": {
"BulletChar": 42,
"Marker": "Kg=="
},
"Properties": {
"id": "20230211102830-9azqf9m"
},
"Children": [
{
"ID": "20230211102830-sbchex4",
"Type": "NodeParagraph",
"Properties": {
"id": "20230211102830-sbchex4",
"updated": "20230211102832"
},
"Children": [
{
"Type": "NodeText",
"Data": "Only images in png and jpg formats are supported"
}
]
}
]
},
{
"ID": "20230211102834-fx3o5su",
"Type": "NodeListItem",
"ListData": {
"BulletChar": 42,
"Marker": "Kg=="
},
"Properties": {
"id": "20230211102834-fx3o5su",
"updated": "20230211102928"
},
"Children": [
{
"ID": "20230211102834-3jzjdrv",
"Type": "NodeParagraph",
"Properties": {
"id": "20230211102834-3jzjdrv",
"updated": "20230211102928"
},
"Children": [
{
"Type": "NodeText",
"Data": "By default, only images below 2MB are processed. If you need to adjust, you can set the environment variable "
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE"
},
{
"Type": "NodeText",
"Data": ", the unit of value is bytes, for example: "
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE=4000000"
},
{
"Type": "NodeText",
"Data": " adjust the upper limit to 4MB"
}
]
}
]
}
]
},

View file

@ -4,7 +4,8 @@
"icon": "1f4d4",
"closed": false,
"refCreateSavePath": "",
"createDocNameTemplate": "",
"docCreateSavePath": "",
"dailyNoteSavePath": "/daily note/{{now | date \"2006/01\"}}/{{now | date \"2006-01-02\"}}",
"dailyNoteTemplatePath": ""
"dailyNoteTemplatePath": "",
"sortMode": 15
}

View file

@ -6,7 +6,7 @@
"id": "20200915214115-42b8zma",
"title": "资源文件",
"type": "doc",
"updated": "20230203182839"
"updated": "20230211103308"
},
"Children": [
{
@ -750,7 +750,7 @@
"ListData": {},
"Properties": {
"id": "20230202231309-pcjl7c2",
"updated": "20230203182839"
"updated": "20230211103308"
},
"Children": [
{
@ -762,7 +762,7 @@
},
"Properties": {
"id": "20230202231311-7qdk1za",
"updated": "20230202231842"
"updated": "20230211103308"
},
"Children": [
{
@ -770,7 +770,7 @@
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231311-n1pf7in",
"updated": "20230203182342"
"updated": "20230211103308"
},
"Children": [
{
@ -817,7 +817,7 @@
},
"Properties": {
"id": "20230202231321-q1b1tza",
"updated": "20230203182839"
"updated": "20230211103207"
},
"Children": [
{
@ -825,7 +825,7 @@
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231321-5ugmgf0",
"updated": "20230203182839"
"updated": "20230211103207"
},
"Children": [
{
@ -916,6 +916,80 @@
]
}
]
},
{
"ID": "20230211102440-0qik4dd",
"Type": "NodeListItem",
"ListData": {
"BulletChar": 42,
"Marker": "Kg=="
},
"Properties": {
"id": "20230211102440-0qik4dd",
"updated": "20230211102642"
},
"Children": [
{
"ID": "20230211102440-09cmf75",
"Type": "NodeParagraph",
"Properties": {
"id": "20230211102440-09cmf75",
"updated": "20230211102642"
},
"Children": [
{
"Type": "NodeText",
"Data": "仅支持 png 和 jpg 格式的图片"
}
]
}
]
},
{
"ID": "20230211102601-ifl3ojm",
"Type": "NodeListItem",
"ListData": {
"BulletChar": 42,
"Marker": "Kg=="
},
"Properties": {
"id": "20230211102601-ifl3ojm",
"updated": "20230211102744"
},
"Children": [
{
"ID": "20230211102601-npe6hvh",
"Type": "NodeParagraph",
"Properties": {
"id": "20230211102601-npe6hvh",
"updated": "20230211102744"
},
"Children": [
{
"Type": "NodeText",
"Data": "默认只对 2MB 以下的图片进行处理,如果需要调整,可以通过环境变量 "
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE"
},
{
"Type": "NodeText",
"Data": " 设置,值的单位是字节,比如:"
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE=4000000"
},
{
"Type": "NodeText",
"Data": " 将上限调整为 4MB"
}
]
}
]
}
]
},

View file

@ -4,7 +4,8 @@
"icon": "1f4d4",
"closed": false,
"refCreateSavePath": "",
"createDocNameTemplate": "",
"docCreateSavePath": "",
"dailyNoteSavePath": "/daily note/{{now | date \"2006/01\"}}/{{now | date \"2006-01-02\"}}",
"dailyNoteTemplatePath": ""
"dailyNoteTemplatePath": "",
"sortMode": 15
}

View file

@ -5,7 +5,7 @@
"Properties": {
"id": "20211226123038-4umgpxy",
"title": "資料文件",
"updated": "20230203183310"
"updated": "20230211103259"
},
"Children": [
{
@ -729,7 +729,7 @@
"ListData": {},
"Properties": {
"id": "20230202231516-o6k9mj1",
"updated": "20230203183310"
"updated": "20230211103259"
},
"Children": [
{
@ -741,7 +741,7 @@
},
"Properties": {
"id": "20230202231516-pwj2ndg",
"updated": "20230203183210"
"updated": "20230211103259"
},
"Children": [
{
@ -749,7 +749,7 @@
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231516-8trf08t",
"updated": "20230203183210"
"updated": "20230211103259"
},
"Children": [
{
@ -796,7 +796,7 @@
},
"Properties": {
"id": "20230202231519-x47s7he",
"updated": "20230203183310"
"updated": "20230211103203"
},
"Children": [
{
@ -804,7 +804,7 @@
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231519-04f6dh6",
"updated": "20230203183310"
"updated": "20230211103203"
},
"Children": [
{
@ -895,6 +895,79 @@
]
}
]
},
{
"ID": "20230211102853-w8ykvqx",
"Type": "NodeListItem",
"ListData": {
"BulletChar": 42,
"Marker": "Kg=="
},
"Properties": {
"id": "20230211102853-w8ykvqx"
},
"Children": [
{
"ID": "20230211102853-4lc4az0",
"Type": "NodeParagraph",
"Properties": {
"id": "20230211102853-4lc4az0",
"updated": "20230211102856"
},
"Children": [
{
"Type": "NodeText",
"Data": "僅支持 png 和 jpg 格式的圖片"
}
]
}
]
},
{
"ID": "20230211102858-0lgz6pc",
"Type": "NodeListItem",
"ListData": {
"BulletChar": 42,
"Marker": "Kg=="
},
"Properties": {
"id": "20230211102858-0lgz6pc",
"updated": "20230211102910"
},
"Children": [
{
"ID": "20230211102858-mfhvy7x",
"Type": "NodeParagraph",
"Properties": {
"id": "20230211102858-mfhvy7x",
"updated": "20230211102910"
},
"Children": [
{
"Type": "NodeText",
"Data": "默認只對 2MB 以下的圖片進行處理,如果需要調整,可以通過環境變量 "
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE"
},
{
"Type": "NodeText",
"Data": " 設置,值的單位是字節,比如:"
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE=4000000"
},
{
"Type": "NodeText",
"Data": " 將上限調整為 4MB"
}
]
}
]
}
]
},

View file

@ -23,17 +23,20 @@ import (
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/88250/gulu"
"github.com/dustin/go-humanize"
"github.com/siyuan-note/logging"
)
var (
TesseractBin = "tesseract"
TesseractEnabled bool
TesseractMaxSize = 2 * 1000 * uint64(1000)
AssetsTexts = map[string]string{}
AssetsTextsLock = sync.Mutex{}
AssetsTextsChanged = false
@ -85,6 +88,10 @@ func Tesseract(imgAbsPath string) string {
return ""
}
if TesseractMaxSize < uint64(info.Size()) {
return ""
}
defer logging.Recover()
ctx, cancel := context.WithTimeout(context.Background(), 7*time.Second)
@ -124,8 +131,15 @@ func initTesseract() {
return
}
maxSizeVal := os.Getenv("SIYUAN_TESSERACT_MAX_SIZE")
if "" != maxSizeVal {
if maxSize, parseErr := strconv.ParseUint(maxSizeVal, 10, 64); nil == parseErr {
TesseractMaxSize = maxSize
}
}
TesseractLangs = filterTesseractLangs(langs)
logging.LogInfof("tesseract-ocr enabled [ver=%s, langs=%s]", ver, strings.Join(TesseractLangs, "+"))
logging.LogInfof("tesseract-ocr enabled [ver=%s, maxSize=%s, langs=%s]", ver, humanize.Bytes(TesseractMaxSize), strings.Join(TesseractLangs, "+"))
}
func filterTesseractLangs(langs []string) (ret []string) {