From d06b37007782e456f3a8d0c97c8681f79db30d23 Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Sat, 22 Nov 2025 10:55:08 +0800 Subject: [PATCH] :art: Image OCR supports more formats https://github.com/siyuan-note/siyuan/issues/16418 Signed-off-by: Daniel <845765@qq.com> --- kernel/util/ocr.go | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/kernel/util/ocr.go b/kernel/util/ocr.go index a9370071e..040468b54 100644 --- a/kernel/util/ocr.go +++ b/kernel/util/ocr.go @@ -182,9 +182,29 @@ func RemoveAssetText(asset string) { assetsTextsChanged.Store(true) } +var tesseractExts = []string{ + ".png", + ".jpg", + ".jpeg", + ".tif", + ".tiff", + ".bmp", + ".gif", + ".webp", + ".pbm", + ".pgm", + ".ppm", + ".pnm", +} + func IsTesseractExtractable(p string) bool { lowerName := strings.ToLower(p) - return strings.HasSuffix(lowerName, ".png") || strings.HasSuffix(lowerName, ".jpg") || strings.HasSuffix(lowerName, ".jpeg") + for _, ext := range tesseractExts { + if strings.HasSuffix(lowerName, ext) { + return true + } + } + return false } // tesseractOCRLock 用于 Tesseract OCR 加锁串行执行提升稳定性 https://github.com/siyuan-note/siyuan/issues/7265 @@ -300,7 +320,7 @@ func InitTesseract() { langs := getTesseractLangs() if 1 > len(langs) { - logging.LogWarnf("no tesseract langs found") + logging.LogWarnf("no tesseract langs found, disabling tesseract-ocr") TesseractEnabled = false tesseractInited.Store(true) return