From b578506ea5e0cc70093b78a077e612816217b632 Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Sun, 27 Aug 2023 11:09:19 +0800 Subject: [PATCH] :art: PDF files longer than 1024 pages are not included in asset file content searching https://github.com/siyuan-note/siyuan/issues/9053 --- kernel/model/asset_content.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/model/asset_content.go b/kernel/model/asset_content.go index ff5c150ac..426d94d53 100644 --- a/kernel/model/asset_content.go +++ b/kernel/model/asset_content.go @@ -748,7 +748,7 @@ func (parser *PdfAssetParser) getTextPageWorker(id int, instance pdfium.Pdfium, // Parse will parse a PDF document using PDFium webassembly module using a worker pool func (parser *PdfAssetParser) Parse(absPath string) (ret *AssetParseResult) { - st := time.Now() + now := time.Now() if !strings.HasSuffix(strings.ToLower(absPath), ".pdf") { return } @@ -850,7 +850,10 @@ func (parser *PdfAssetParser) Parse(absPath string) (ret *AssetParseResult) { } } close(results) - logging.LogInfof("convert [%s] PDF with %d pages using %d workers took %s.\n", tmp, pc.PageCount, cores, time.Since(st)) + + if 256 < pc.PageCount { + logging.LogInfof("convert [%s] PDF with [%d[ pages using [%d] workers took [%s]", absPath, pc.PageCount, cores, time.Since(now)) + } // loop through ordered PDF text pages and join content for asset parse DB result content := ""