From c50f231d8106726dccafbf87adf7b7940db8e25b Mon Sep 17 00:00:00 2001 From: Liang Ding Date: Mon, 16 Jan 2023 21:00:52 +0800 Subject: [PATCH 1/2] =?UTF-8?q?:art:=20=E6=A1=8C=E9=9D=A2=E7=AB=AF?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E6=90=9C=E7=B4=A2=E5=9B=BE=E7=89=87=20OCR=20?= =?UTF-8?q?=E6=96=87=E6=9C=AC=20https://github.com/siyuan-note/siyuan/issu?= =?UTF-8?q?es/3470?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/go.mod | 8 ++++++-- kernel/go.sum | 11 +++++++++++ kernel/util/ocr.go | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/kernel/go.mod b/kernel/go.mod index 309d35377..f0c963057 100644 --- a/kernel/go.mod +++ b/kernel/go.mod @@ -28,7 +28,7 @@ require ( github.com/gin-contrib/sessions v0.0.5 github.com/gin-gonic/gin v1.8.2 github.com/gofrs/flock v0.8.1 - github.com/imroc/req/v3 v3.29.0 + github.com/imroc/req/v3 v3.30.0 github.com/jinzhu/copier v0.3.5 github.com/json-iterator/go v1.1.12 github.com/mattn/go-sqlite3 v2.0.3+incompatible @@ -43,7 +43,7 @@ require ( github.com/siyuan-note/encryption v0.0.0-20220713091850-5ecd92177b75 github.com/siyuan-note/eventbus v0.0.0-20220916025349-3ac6e75522da github.com/siyuan-note/filelock v0.0.0-20221117095924-e1947438a35e - github.com/siyuan-note/httpclient v0.0.0-20230107020227-d12d9c7d13e5 + github.com/siyuan-note/httpclient v0.0.0-20230116125720-ee36ddf6f223 github.com/siyuan-note/logging v0.0.0-20221031125421-9b7234d79d8a github.com/siyuan-note/riff v0.0.0-20221228031102-17d458a1217b github.com/steambap/captcha v1.4.1 @@ -64,6 +64,7 @@ require ( github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef // indirect github.com/aws/aws-sdk-go v1.44.180 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cheekybits/genny v1.0.0 // indirect github.com/dlclark/regexp2 v1.8.0 // indirect github.com/dsnet/compress v0.0.1 // indirect github.com/gin-contrib/sse v0.1.0 // indirect @@ -105,6 +106,8 @@ require ( github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/nxadm/tail v1.4.8 // indirect + github.com/onsi/ginkgo v1.16.5 // indirect github.com/onsi/ginkgo/v2 v2.7.0 // indirect github.com/open-spaced-repetition/go-fsrs v0.1.0 // indirect github.com/pelletier/go-toml/v2 v2.0.6 // indirect @@ -128,6 +131,7 @@ require ( golang.org/x/sys v0.4.0 // indirect golang.org/x/tools v0.5.0 // indirect google.golang.org/protobuf v1.28.1 // indirect + gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect ) diff --git a/kernel/go.sum b/kernel/go.sum index f46d82955..0127b0520 100644 --- a/kernel/go.sum +++ b/kernel/go.sum @@ -56,6 +56,7 @@ github.com/buger/jsonparser v0.0.0-20181115193947-bf1c66bbce23/go.mod h1:bbYlZJ7 github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cheekybits/genny v1.0.0 h1:uGGa4nei+j20rOSeDeP5Of12XVm7TGUd4dJA9RDitfE= github.com/cheekybits/genny v1.0.0/go.mod h1:+tQajlRqAUrPI7DOSpB0XAqZYtQakVtB7wXkRAgjxjQ= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be h1:J5BL2kskAlV9ckgEsNQXscjIaLiOYiZ75d4e94E6dcQ= @@ -212,6 +213,8 @@ github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk= github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg= github.com/imroc/req/v3 v3.29.0 h1:QES7vJ7pE6AJWDJtntk81v299PUgcMY0XxcrP4Drmt4= github.com/imroc/req/v3 v3.29.0/go.mod h1:u+HHE8MLYi6SkkfMKb09lvmQdJwUf4wfnVGoEsj8Xtk= +github.com/imroc/req/v3 v3.30.0 h1:4iSXgIQfh/3N7JK9Lt7S0q3n/ZvuGICYwV3iv/MWY1M= +github.com/imroc/req/v3 v3.30.0/go.mod h1:DKtNwSxMdpqZKJ6neBw8VwRioq78uwmQB4ynQEXNNUk= github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU= github.com/jinzhu/copier v0.3.5 h1:GlvfUwHk62RokgqVNvYsku0TATCF7bAHVwEXoBh3iJg= github.com/jinzhu/copier v0.3.5/go.mod h1:DfbEm0FYsaqBcKcFuvmOZb218JkPGtvSHsKg8S8hyyg= @@ -261,9 +264,11 @@ github.com/marten-seemann/qtls-go1-16 v0.1.5/go.mod h1:gNpI2Ol+lRS3WwSOtIUUtRwZE github.com/marten-seemann/qtls-go1-17 v0.1.2 h1:JADBlm0LYiVbuSySCHeY863dNkcpMmDR7s0bLKJeYlQ= github.com/marten-seemann/qtls-go1-17 v0.1.2/go.mod h1:C2ekUKcDdz9SDWxec1N/MvcXBpaX9l3Nx67XaR84L5s= github.com/marten-seemann/qtls-go1-18 v0.1.2/go.mod h1:mJttiymBAByA49mhlNZZGrH5u1uXYZJ+RW28Py7f4m4= +github.com/marten-seemann/qtls-go1-18 v0.1.3/go.mod h1:mJttiymBAByA49mhlNZZGrH5u1uXYZJ+RW28Py7f4m4= github.com/marten-seemann/qtls-go1-18 v0.1.4 h1:ogomB+lWV3Vmwiu6RTwDVTMGx+9j7SEi98e8QB35Its= github.com/marten-seemann/qtls-go1-18 v0.1.4/go.mod h1:mJttiymBAByA49mhlNZZGrH5u1uXYZJ+RW28Py7f4m4= github.com/marten-seemann/qtls-go1-19 v0.1.0-beta.1/go.mod h1:5HTDWtVudo/WFsHKRNuOhWlbdjrfs5JHrYb0wIJqGpI= +github.com/marten-seemann/qtls-go1-19 v0.1.1/go.mod h1:5HTDWtVudo/WFsHKRNuOhWlbdjrfs5JHrYb0wIJqGpI= github.com/marten-seemann/qtls-go1-19 v0.1.2 h1:ZevAEqKXH0bZmoOBPiqX2h5rhQ7cbZi+X+rlq2JUbCE= github.com/marten-seemann/qtls-go1-19 v0.1.2/go.mod h1:5HTDWtVudo/WFsHKRNuOhWlbdjrfs5JHrYb0wIJqGpI= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= @@ -390,6 +395,8 @@ github.com/siyuan-note/filelock v0.0.0-20221117095924-e1947438a35e h1:i3RKrdrddr github.com/siyuan-note/filelock v0.0.0-20221117095924-e1947438a35e/go.mod h1:NmpSIVtIGy8eNWapjDIiiCw5+5r5wxC76k40oG+WRXQ= github.com/siyuan-note/httpclient v0.0.0-20230107020227-d12d9c7d13e5 h1:rLMuTLwYLPHGyQdeinVyZdtiHp+aMBTaZxOM40RAJ7s= github.com/siyuan-note/httpclient v0.0.0-20230107020227-d12d9c7d13e5/go.mod h1:tB4/csNIf0gyxXTH+I5rQePH/aZgxXaDlJpdfPf+hg4= +github.com/siyuan-note/httpclient v0.0.0-20230116125720-ee36ddf6f223 h1:hG+gucj92x4Dl4lIe2G0WkPgBdlEBnnQCmYpghHeW54= +github.com/siyuan-note/httpclient v0.0.0-20230116125720-ee36ddf6f223/go.mod h1:tB4/csNIf0gyxXTH+I5rQePH/aZgxXaDlJpdfPf+hg4= github.com/siyuan-note/logging v0.0.0-20221031125421-9b7234d79d8a h1:b9VJCE8IccYjsadwNBI11he+Wn25hI9lCma4uYoIYEM= github.com/siyuan-note/logging v0.0.0-20221031125421-9b7234d79d8a/go.mod h1:t1zRGxK13L/9ZFoGyTD39IbFCbee3CsypDj4b5dt4qM= github.com/siyuan-note/riff v0.0.0-20221228031102-17d458a1217b h1:JDpKOdiyocNsgKFfrF3mB7UoBJz4qcHBUKBig78kVjc= @@ -439,6 +446,7 @@ github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsr github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= @@ -507,6 +515,7 @@ golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96b golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= @@ -554,6 +563,7 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211020174200-9d6173849985/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211023085530-d6a326fbbf70/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -597,6 +607,7 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.8-0.20211022200916-316ba0b74098/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo= +golang.org/x/tools v0.1.11/go.mod h1:SgwaegtQh8clINPpECJMqnxLv9I09HLqnW3RMqW0CA4= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.5.0 h1:+bSpV5HIeWkuvgaMfI3UmKRThoTA5ODJTUd8T17NO+4= golang.org/x/tools v0.5.0/go.mod h1:N+Kgy78s5I24c24dU8OfWNEotWjutIs8SnJvn5IDq+k= diff --git a/kernel/util/ocr.go b/kernel/util/ocr.go index 58811682b..930afa00b 100644 --- a/kernel/util/ocr.go +++ b/kernel/util/ocr.go @@ -38,6 +38,7 @@ import ( var ( tesseractEnabled bool + tesseractLangs []string assetsTexts = map[string]string{} assetsTextsLock = sync.Mutex{} assetsTextsChanged = false @@ -77,7 +78,7 @@ func Tesseract(imgAbsPath string) string { defer cancel() now := time.Now() - cmd := exec.CommandContext(ctx, "tesseract", "-c", "debug_file=/dev/null", imgAbsPath, "stdout", "-l", "chi_sim+eng") + cmd := exec.CommandContext(ctx, "tesseract", "-c", "debug_file=/dev/null", imgAbsPath, "stdout", "-l", strings.Join(tesseractLangs, "+")) gulu.CmdAttr(cmd) output, err := cmd.CombinedOutput() if ctx.Err() == context.DeadlineExceeded { @@ -281,6 +282,12 @@ func initTesseract() { return } + tesseractLangs = getTesseractLangs() + if 1 > len(tesseractLangs) { + logging.LogWarnf("no tesseract langs found") + tesseractEnabled = false + return + } logging.LogInfof("tesseract-ocr enabled [ver=%s]", ver) } @@ -303,3 +310,29 @@ func getTesseractVer() (ret string) { } return } + +func getTesseractLangs() (ret []string) { + if !tesseractEnabled { + return nil + } + + cmd := exec.Command("tesseract", "--list-langs") + gulu.CmdAttr(cmd) + data, err := cmd.CombinedOutput() + if nil != err { + return nil + } + + parts := bytes.Split(data, []byte("\n")) + if 0 < len(parts) { + parts = parts[1:] + } + for _, part := range parts { + part = bytes.TrimSpace(part) + if 0 == len(part) { + continue + } + ret = append(ret, string(part)) + } + return +} From f40f9b6b2f9cef17c1a379158b5ab676b50a84e2 Mon Sep 17 00:00:00 2001 From: Liang Ding Date: Mon, 16 Jan 2023 21:03:12 +0800 Subject: [PATCH 2/2] =?UTF-8?q?:art:=20=E6=A1=8C=E9=9D=A2=E7=AB=AF?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E6=90=9C=E7=B4=A2=E5=9B=BE=E7=89=87=20OCR=20?= =?UTF-8?q?=E6=96=87=E6=9C=AC=20https://github.com/siyuan-note/siyuan/issu?= =?UTF-8?q?es/3470?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/util/ocr.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/util/ocr.go b/kernel/util/ocr.go index 930afa00b..722b39def 100644 --- a/kernel/util/ocr.go +++ b/kernel/util/ocr.go @@ -288,7 +288,7 @@ func initTesseract() { tesseractEnabled = false return } - logging.LogInfof("tesseract-ocr enabled [ver=%s]", ver) + logging.LogInfof("tesseract-ocr enabled [ver=%s, langs=%s]", ver, strings.Join(tesseractLangs, "+")) } func getTesseractVer() (ret string) {