From 17b598b03334a01fbb95814ca64214f97f1b969f Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Wed, 25 Oct 2023 09:48:13 +0800 Subject: [PATCH] :art: PDF files larger than 64MB are not included in asset file content searching https://github.com/siyuan-note/siyuan/issues/9500 --- kernel/model/asset_content.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/model/asset_content.go b/kernel/model/asset_content.go index b1b931545..cfd356e02 100644 --- a/kernel/model/asset_content.go +++ b/kernel/model/asset_content.go @@ -480,6 +480,7 @@ func NewAssetsSearcher() *AssetsSearcher { const ( TxtAssetContentMaxSize = 1024 * 1024 * 4 PDFAssetContentMaxPage = 1024 + PDFAssetContentMaxSize = 1024 * 1024 * 64 ) type AssetParseResult struct { @@ -828,6 +829,12 @@ func (parser *PdfAssetParser) Parse(absPath string) (ret *AssetParseResult) { return } + if PDFAssetContentMaxSize < len(pdfData) { + // PDF files larger than 64MB are not included in asset file content searching https://github.com/siyuan-note/siyuan/issues/9500 + logging.LogWarnf("ignore large PDF asset [%s] with [%s]", absPath, humanize.Bytes(uint64(len(pdfData)))) + return + } + // next setup worker pool for processing PDF pages pages := make(chan *pdfPage, pc.PageCount) results := make(chan *pdfTextResult, pc.PageCount)