mirror of
https://github.com/siyuan-note/siyuan.git
synced 2026-01-06 00:38:49 +01:00
🎨 大于 2MB 的图片默认不进行 OCR Fix https://github.com/siyuan-note/siyuan/issues/7333
This commit is contained in:
parent
5e254500ef
commit
3dbc6d91ed
7 changed files with 263 additions and 26 deletions
|
|
@ -4,7 +4,8 @@
|
|||
"icon": "1f4d4",
|
||||
"closed": false,
|
||||
"refCreateSavePath": "",
|
||||
"createDocNameTemplate": "",
|
||||
"docCreateSavePath": "",
|
||||
"dailyNoteSavePath": "/daily note/{{now | date \"2006/01\"}}/{{now | date \"2006-01-02\"}}",
|
||||
"dailyNoteTemplatePath": ""
|
||||
"dailyNoteTemplatePath": "",
|
||||
"sortMode": 15
|
||||
}
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
"id": "20200924100744-br924ar",
|
||||
"title": "Assets",
|
||||
"type": "doc",
|
||||
"updated": "20230203183434"
|
||||
"updated": "20230211103249"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -724,7 +724,7 @@
|
|||
"ListData": {},
|
||||
"Properties": {
|
||||
"id": "20230202231731-bdh7lab",
|
||||
"updated": "20230203183434"
|
||||
"updated": "20230211103249"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -736,7 +736,7 @@
|
|||
},
|
||||
"Properties": {
|
||||
"id": "20230202231732-n7z8jth",
|
||||
"updated": "20230203183347"
|
||||
"updated": "20230211103249"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -744,7 +744,7 @@
|
|||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230202231732-f3jkj7p",
|
||||
"updated": "20230203183347"
|
||||
"updated": "20230211103249"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -791,7 +791,7 @@
|
|||
},
|
||||
"Properties": {
|
||||
"id": "20230202231800-z8hswmk",
|
||||
"updated": "20230203183434"
|
||||
"updated": "20230211103154"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -799,7 +799,7 @@
|
|||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230202231800-c3x45ky",
|
||||
"updated": "20230203183434"
|
||||
"updated": "20230211103154"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -881,7 +881,7 @@
|
|||
{
|
||||
"Type": "NodeTextMark",
|
||||
"TextMarkType": "code",
|
||||
"TextMarkTextContent": "SIYUAN_TESSERACT_LANGS=chi_sim+eng "
|
||||
"TextMarkTextContent": "SIYUAN_TESSERACT_LANGS=chi_sim+eng"
|
||||
},
|
||||
{
|
||||
"Type": "NodeText",
|
||||
|
|
@ -890,6 +890,79 @@
|
|||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"ID": "20230211102830-9azqf9m",
|
||||
"Type": "NodeListItem",
|
||||
"ListData": {
|
||||
"BulletChar": 42,
|
||||
"Marker": "Kg=="
|
||||
},
|
||||
"Properties": {
|
||||
"id": "20230211102830-9azqf9m"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"ID": "20230211102830-sbchex4",
|
||||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230211102830-sbchex4",
|
||||
"updated": "20230211102832"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": "Only images in png and jpg formats are supported"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"ID": "20230211102834-fx3o5su",
|
||||
"Type": "NodeListItem",
|
||||
"ListData": {
|
||||
"BulletChar": 42,
|
||||
"Marker": "Kg=="
|
||||
},
|
||||
"Properties": {
|
||||
"id": "20230211102834-fx3o5su",
|
||||
"updated": "20230211102928"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"ID": "20230211102834-3jzjdrv",
|
||||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230211102834-3jzjdrv",
|
||||
"updated": "20230211102928"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": "By default, only images below 2MB are processed. If you need to adjust, you can set the environment variable "
|
||||
},
|
||||
{
|
||||
"Type": "NodeTextMark",
|
||||
"TextMarkType": "code",
|
||||
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE"
|
||||
},
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": ", the unit of value is bytes, for example: "
|
||||
},
|
||||
{
|
||||
"Type": "NodeTextMark",
|
||||
"TextMarkType": "code",
|
||||
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE=4000000"
|
||||
},
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": " adjust the upper limit to 4MB"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
"icon": "1f4d4",
|
||||
"closed": false,
|
||||
"refCreateSavePath": "",
|
||||
"createDocNameTemplate": "",
|
||||
"docCreateSavePath": "",
|
||||
"dailyNoteSavePath": "/daily note/{{now | date \"2006/01\"}}/{{now | date \"2006-01-02\"}}",
|
||||
"dailyNoteTemplatePath": ""
|
||||
"dailyNoteTemplatePath": "",
|
||||
"sortMode": 15
|
||||
}
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
"id": "20200915214115-42b8zma",
|
||||
"title": "资源文件",
|
||||
"type": "doc",
|
||||
"updated": "20230203182839"
|
||||
"updated": "20230211103308"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -750,7 +750,7 @@
|
|||
"ListData": {},
|
||||
"Properties": {
|
||||
"id": "20230202231309-pcjl7c2",
|
||||
"updated": "20230203182839"
|
||||
"updated": "20230211103308"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -762,7 +762,7 @@
|
|||
},
|
||||
"Properties": {
|
||||
"id": "20230202231311-7qdk1za",
|
||||
"updated": "20230202231842"
|
||||
"updated": "20230211103308"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -770,7 +770,7 @@
|
|||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230202231311-n1pf7in",
|
||||
"updated": "20230203182342"
|
||||
"updated": "20230211103308"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -817,7 +817,7 @@
|
|||
},
|
||||
"Properties": {
|
||||
"id": "20230202231321-q1b1tza",
|
||||
"updated": "20230203182839"
|
||||
"updated": "20230211103207"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -825,7 +825,7 @@
|
|||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230202231321-5ugmgf0",
|
||||
"updated": "20230203182839"
|
||||
"updated": "20230211103207"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -916,6 +916,80 @@
|
|||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"ID": "20230211102440-0qik4dd",
|
||||
"Type": "NodeListItem",
|
||||
"ListData": {
|
||||
"BulletChar": 42,
|
||||
"Marker": "Kg=="
|
||||
},
|
||||
"Properties": {
|
||||
"id": "20230211102440-0qik4dd",
|
||||
"updated": "20230211102642"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"ID": "20230211102440-09cmf75",
|
||||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230211102440-09cmf75",
|
||||
"updated": "20230211102642"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": "仅支持 png 和 jpg 格式的图片"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"ID": "20230211102601-ifl3ojm",
|
||||
"Type": "NodeListItem",
|
||||
"ListData": {
|
||||
"BulletChar": 42,
|
||||
"Marker": "Kg=="
|
||||
},
|
||||
"Properties": {
|
||||
"id": "20230211102601-ifl3ojm",
|
||||
"updated": "20230211102744"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"ID": "20230211102601-npe6hvh",
|
||||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230211102601-npe6hvh",
|
||||
"updated": "20230211102744"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": "默认只对 2MB 以下的图片进行处理,如果需要调整,可以通过环境变量 "
|
||||
},
|
||||
{
|
||||
"Type": "NodeTextMark",
|
||||
"TextMarkType": "code",
|
||||
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE"
|
||||
},
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": " 设置,值的单位是字节,比如:"
|
||||
},
|
||||
{
|
||||
"Type": "NodeTextMark",
|
||||
"TextMarkType": "code",
|
||||
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE=4000000"
|
||||
},
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": " 将上限调整为 4MB"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
"icon": "1f4d4",
|
||||
"closed": false,
|
||||
"refCreateSavePath": "",
|
||||
"createDocNameTemplate": "",
|
||||
"docCreateSavePath": "",
|
||||
"dailyNoteSavePath": "/daily note/{{now | date \"2006/01\"}}/{{now | date \"2006-01-02\"}}",
|
||||
"dailyNoteTemplatePath": ""
|
||||
"dailyNoteTemplatePath": "",
|
||||
"sortMode": 15
|
||||
}
|
||||
|
|
@ -5,7 +5,7 @@
|
|||
"Properties": {
|
||||
"id": "20211226123038-4umgpxy",
|
||||
"title": "資料文件",
|
||||
"updated": "20230203183310"
|
||||
"updated": "20230211103259"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -729,7 +729,7 @@
|
|||
"ListData": {},
|
||||
"Properties": {
|
||||
"id": "20230202231516-o6k9mj1",
|
||||
"updated": "20230203183310"
|
||||
"updated": "20230211103259"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -741,7 +741,7 @@
|
|||
},
|
||||
"Properties": {
|
||||
"id": "20230202231516-pwj2ndg",
|
||||
"updated": "20230203183210"
|
||||
"updated": "20230211103259"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -749,7 +749,7 @@
|
|||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230202231516-8trf08t",
|
||||
"updated": "20230203183210"
|
||||
"updated": "20230211103259"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -796,7 +796,7 @@
|
|||
},
|
||||
"Properties": {
|
||||
"id": "20230202231519-x47s7he",
|
||||
"updated": "20230203183310"
|
||||
"updated": "20230211103203"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -804,7 +804,7 @@
|
|||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230202231519-04f6dh6",
|
||||
"updated": "20230203183310"
|
||||
"updated": "20230211103203"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -895,6 +895,79 @@
|
|||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"ID": "20230211102853-w8ykvqx",
|
||||
"Type": "NodeListItem",
|
||||
"ListData": {
|
||||
"BulletChar": 42,
|
||||
"Marker": "Kg=="
|
||||
},
|
||||
"Properties": {
|
||||
"id": "20230211102853-w8ykvqx"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"ID": "20230211102853-4lc4az0",
|
||||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230211102853-4lc4az0",
|
||||
"updated": "20230211102856"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": "僅支持 png 和 jpg 格式的圖片"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"ID": "20230211102858-0lgz6pc",
|
||||
"Type": "NodeListItem",
|
||||
"ListData": {
|
||||
"BulletChar": 42,
|
||||
"Marker": "Kg=="
|
||||
},
|
||||
"Properties": {
|
||||
"id": "20230211102858-0lgz6pc",
|
||||
"updated": "20230211102910"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"ID": "20230211102858-mfhvy7x",
|
||||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230211102858-mfhvy7x",
|
||||
"updated": "20230211102910"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": "默認只對 2MB 以下的圖片進行處理,如果需要調整,可以通過環境變量 "
|
||||
},
|
||||
{
|
||||
"Type": "NodeTextMark",
|
||||
"TextMarkType": "code",
|
||||
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE"
|
||||
},
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": " 設置,值的單位是字節,比如:"
|
||||
},
|
||||
{
|
||||
"Type": "NodeTextMark",
|
||||
"TextMarkType": "code",
|
||||
"TextMarkTextContent": "SIYUAN_TESSERACT_MAX_SIZE=4000000"
|
||||
},
|
||||
{
|
||||
"Type": "NodeText",
|
||||
"Data": " 將上限調整為 4MB"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
|
|||
|
|
@ -23,17 +23,20 @@ import (
|
|||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/88250/gulu"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/siyuan-note/logging"
|
||||
)
|
||||
|
||||
var (
|
||||
TesseractBin = "tesseract"
|
||||
TesseractEnabled bool
|
||||
TesseractMaxSize = 2 * 1000 * uint64(1000)
|
||||
AssetsTexts = map[string]string{}
|
||||
AssetsTextsLock = sync.Mutex{}
|
||||
AssetsTextsChanged = false
|
||||
|
|
@ -85,6 +88,10 @@ func Tesseract(imgAbsPath string) string {
|
|||
return ""
|
||||
}
|
||||
|
||||
if TesseractMaxSize < uint64(info.Size()) {
|
||||
return ""
|
||||
}
|
||||
|
||||
defer logging.Recover()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 7*time.Second)
|
||||
|
|
@ -124,8 +131,15 @@ func initTesseract() {
|
|||
return
|
||||
}
|
||||
|
||||
maxSizeVal := os.Getenv("SIYUAN_TESSERACT_MAX_SIZE")
|
||||
if "" != maxSizeVal {
|
||||
if maxSize, parseErr := strconv.ParseUint(maxSizeVal, 10, 64); nil == parseErr {
|
||||
TesseractMaxSize = maxSize
|
||||
}
|
||||
}
|
||||
|
||||
TesseractLangs = filterTesseractLangs(langs)
|
||||
logging.LogInfof("tesseract-ocr enabled [ver=%s, langs=%s]", ver, strings.Join(TesseractLangs, "+"))
|
||||
logging.LogInfof("tesseract-ocr enabled [ver=%s, maxSize=%s, langs=%s]", ver, humanize.Bytes(TesseractMaxSize), strings.Join(TesseractLangs, "+"))
|
||||
}
|
||||
|
||||
func filterTesseractLangs(langs []string) (ret []string) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue