diff --git a/kernel/model/asset_content.go b/kernel/model/asset_content.go index c5ac3c7d9..ff5c150ac 100644 --- a/kernel/model/asset_content.go +++ b/kernel/model/asset_content.go @@ -519,6 +519,7 @@ func (parser *TxtAssetParser) Parse(absPath string) (ret *AssetParseResult) { if !utf8.Valid(data) { // Non-UTF-8 encoded text files are not included in asset file content searching https://github.com/siyuan-note/siyuan/issues/9052 + logging.LogWarnf("asset [%s] is not UTF-8 encoded", absPath) return } @@ -808,6 +809,12 @@ func (parser *PdfAssetParser) Parse(absPath string) (ret *AssetParseResult) { } instance.Close() + if 1024 < pc.PageCount { + // PDF files longer than 1024 pages are not included in asset file content searching https://github.com/siyuan-note/siyuan/issues/9053 + logging.LogWarnf("ignore large PDF asset [%s] with [%d] pages", absPath, pc.PageCount) + return + } + // next setup worker pool for processing PDF pages pages := make(chan *pdfPage, pc.PageCount) results := make(chan *pdfTextResult, pc.PageCount)