From 17b598b03334a01fbb95814ca64214f97f1b969f Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Wed, 25 Oct 2023 09:48:13 +0800 Subject: [PATCH 1/3] :art: PDF files larger than 64MB are not included in asset file content searching https://github.com/siyuan-note/siyuan/issues/9500 --- kernel/model/asset_content.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/model/asset_content.go b/kernel/model/asset_content.go index b1b931545..cfd356e02 100644 --- a/kernel/model/asset_content.go +++ b/kernel/model/asset_content.go @@ -480,6 +480,7 @@ func NewAssetsSearcher() *AssetsSearcher { const ( TxtAssetContentMaxSize = 1024 * 1024 * 4 PDFAssetContentMaxPage = 1024 + PDFAssetContentMaxSize = 1024 * 1024 * 64 ) type AssetParseResult struct { @@ -828,6 +829,12 @@ func (parser *PdfAssetParser) Parse(absPath string) (ret *AssetParseResult) { return } + if PDFAssetContentMaxSize < len(pdfData) { + // PDF files larger than 64MB are not included in asset file content searching https://github.com/siyuan-note/siyuan/issues/9500 + logging.LogWarnf("ignore large PDF asset [%s] with [%s]", absPath, humanize.Bytes(uint64(len(pdfData)))) + return + } + // next setup worker pool for processing PDF pages pages := make(chan *pdfPage, pc.PageCount) results := make(chan *pdfTextResult, pc.PageCount) From 28db71aaf44b0dfe2061b05677088d6036bb9c73 Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Wed, 25 Oct 2023 09:54:12 +0800 Subject: [PATCH 2/3] :art: PDF files larger than 64MB are not included in asset file content searching https://github.com/siyuan-note/siyuan/issues/9500 --- .../20230805232134-3d6mx2k.sy | 72 ++++++++++++++++--- .../20230805230218-aea8icj.sy | 72 ++++++++++++++++--- .../20230805232920-5fdco36.sy | 65 +++++++++++++++-- 3 files changed, 186 insertions(+), 23 deletions(-) diff --git a/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20230805231614-vqn28eh/20230805231816-h1z9mpc/20230805232134-3d6mx2k.sy b/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20230805231614-vqn28eh/20230805231816-h1z9mpc/20230805232134-3d6mx2k.sy index 7553696b4..6c42d38ad 100644 --- a/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20230805231614-vqn28eh/20230805231816-h1z9mpc/20230805232134-3d6mx2k.sy +++ b/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20230805231614-vqn28eh/20230805231816-h1z9mpc/20230805232134-3d6mx2k.sy @@ -5,7 +5,7 @@ "Properties": { "id": "20230805232134-3d6mx2k", "title": "Search asset content", - "updated": "20230903112423" + "updated": "20231025095300" }, "Children": [ { @@ -205,7 +205,7 @@ }, { "Type": "NodeText", - "Data": "​:" + "Data": "​​:" } ] }, @@ -215,27 +215,81 @@ "ListData": {}, "Properties": { "id": "20230903112419-45ps71g", - "updated": "20230903112423" + "updated": "20231025095300" }, "Children": [ { - "ID": "20230903112419-t7iom3o", + "ID": "20231025095236-xu5tp9f", "Type": "NodeListItem", "ListData": { "BulletChar": 42, "Marker": "Kg==" }, "Properties": { - "id": "20230903112419-t7iom3o", - "updated": "20230903112423" + "id": "20231025095236-xu5tp9f" }, "Children": [ { - "ID": "20230903112419-8gd73ed", + "ID": "20231025095236-18gmg9h", "Type": "NodeParagraph", "Properties": { - "id": "20230903112419-8gd73ed", - "updated": "20230903112423" + "id": "20231025095236-18gmg9h", + "updated": "20231025095237" + }, + "Children": [ + { + "Type": "NodeText", + "Data": "Text files larger than 4MB are not supported" + } + ] + } + ] + }, + { + "ID": "20231025095244-lpf6spo", + "Type": "NodeListItem", + "ListData": { + "BulletChar": 42, + "Marker": "Kg==" + }, + "Properties": { + "id": "20231025095244-lpf6spo" + }, + "Children": [ + { + "ID": "20231025095244-m8ut4h2", + "Type": "NodeParagraph", + "Properties": { + "id": "20231025095244-m8ut4h2", + "updated": "20231025095244" + }, + "Children": [ + { + "Type": "NodeText", + "Data": "PDFs larger than 64MB or 1024 pages are not supported" + } + ] + } + ] + }, + { + "ID": "20231025095259-wrlui2y", + "Type": "NodeListItem", + "ListData": { + "BulletChar": 42, + "Marker": "Kg==" + }, + "Properties": { + "id": "20231025095259-wrlui2y", + "updated": "20231025095300" + }, + "Children": [ + { + "ID": "20231025095259-cm641sa", + "Type": "NodeParagraph", + "Properties": { + "id": "20231025095259-cm641sa", + "updated": "20231025095300" }, "Children": [ { diff --git a/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk/20230805225107-qm1m2f5/20230805230218-aea8icj.sy b/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk/20230805225107-qm1m2f5/20230805230218-aea8icj.sy index 14790bb10..9d754c1b4 100644 --- a/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk/20230805225107-qm1m2f5/20230805230218-aea8icj.sy +++ b/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk/20230805225107-qm1m2f5/20230805230218-aea8icj.sy @@ -5,7 +5,7 @@ "Properties": { "id": "20230805230218-aea8icj", "title": "搜索资源文件内容", - "updated": "20230903112313" + "updated": "20231025095306" }, "Children": [ { @@ -207,7 +207,7 @@ }, { "Type": "NodeText", - "Data": "​:" + "Data": "​​:" } ] }, @@ -217,27 +217,81 @@ "ListData": {}, "Properties": { "id": "20230903112243-pwm33kc", - "updated": "20230903112313" + "updated": "20231025095306" }, "Children": [ { - "ID": "20230903112244-2ye7sed", + "ID": "20231025095048-l224arr", "Type": "NodeListItem", "ListData": { "BulletChar": 42, "Marker": "Kg==" }, "Properties": { - "id": "20230903112244-2ye7sed", - "updated": "20230903112313" + "id": "20231025095048-l224arr" }, "Children": [ { - "ID": "20230903112244-ux44rel", + "ID": "20231025095048-u6ge0ue", "Type": "NodeParagraph", "Properties": { - "id": "20230903112244-ux44rel", - "updated": "20230903112313" + "id": "20231025095048-u6ge0ue", + "updated": "20231025095053" + }, + "Children": [ + { + "Type": "NodeText", + "Data": "不支持大于 4MB 的文本文件" + } + ] + } + ] + }, + { + "ID": "20231025095053-a0quwv1", + "Type": "NodeListItem", + "ListData": { + "BulletChar": 42, + "Marker": "Kg==" + }, + "Properties": { + "id": "20231025095053-a0quwv1" + }, + "Children": [ + { + "ID": "20231025095053-ee3etgb", + "Type": "NodeParagraph", + "Properties": { + "id": "20231025095053-ee3etgb", + "updated": "20231025095107" + }, + "Children": [ + { + "Type": "NodeText", + "Data": "不支持大于 64MB 或 1024 页的 PDF" + } + ] + } + ] + }, + { + "ID": "20231025095306-wro7c1x", + "Type": "NodeListItem", + "ListData": { + "BulletChar": 42, + "Marker": "Kg==" + }, + "Properties": { + "id": "20231025095306-wro7c1x", + "updated": "20231025095306" + }, + "Children": [ + { + "ID": "20231025095306-6gcc6ba", + "Type": "NodeParagraph", + "Properties": { + "id": "20231025095306-6gcc6ba", + "updated": "20231025095306" }, "Children": [ { diff --git a/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20230805232636-zh0adz2/20230805232719-04mqbcx/20230805232920-5fdco36.sy b/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20230805232636-zh0adz2/20230805232719-04mqbcx/20230805232920-5fdco36.sy index ddd1d06bd..c6a758149 100644 --- a/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20230805232636-zh0adz2/20230805232719-04mqbcx/20230805232920-5fdco36.sy +++ b/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20230805232636-zh0adz2/20230805232719-04mqbcx/20230805232920-5fdco36.sy @@ -5,7 +5,7 @@ "Properties": { "id": "20230805232920-5fdco36", "title": "搜索資源文件內容", - "updated": "20230903112449" + "updated": "20231025095336" }, "Children": [ { @@ -220,9 +220,64 @@ "ListData": {}, "Properties": { "id": "20230903112449-mq7jbfe", - "updated": "20230903112449" + "updated": "20231025095336" }, "Children": [ + { + "ID": "20231025095311-dviybbz", + "Type": "NodeListItem", + "ListData": { + "BulletChar": 42, + "Marker": "Kg==" + }, + "Properties": { + "id": "20231025095311-dviybbz" + }, + "Children": [ + { + "ID": "20231025095311-uri9j00", + "Type": "NodeParagraph", + "Properties": { + "id": "20231025095311-uri9j00", + "updated": "20231025095316" + }, + "Children": [ + { + "Type": "NodeText", + "Data": "不支援大於 4MB 的文字文件" + } + ] + } + ] + }, + { + "ID": "20231025095319-kupw6kw", + "Type": "NodeListItem", + "ListData": { + "BulletChar": 42, + "Marker": "Kg==" + }, + "Properties": { + "id": "20231025095319-kupw6kw", + "updated": "20231025095323" + }, + "Children": [ + { + "ID": "20231025095319-egfy5yi", + "Type": "NodeParagraph", + "Properties": { + "id": "20231025095319-egfy5yi", + "updated": "20231025095323" + }, + "Children": [ + { + "Type": "NodeText", + "Data": "不支援大於 64MB 或 1024 頁的 PDF" + } + ] + } + ] + }, { "ID": "20230903112449-ihee8jo", "Type": "NodeListItem", @@ -232,7 +287,7 @@ }, "Properties": { "id": "20230903112449-ihee8jo", - "updated": "20230903112449" + "updated": "20231025095336" }, "Children": [ { @@ -240,12 +295,12 @@ "Type": "NodeParagraph", "Properties": { "id": "20230903112449-v6wir40", - "updated": "20230903112449" + "updated": "20231025095336" }, "Children": [ { "Type": "NodeText", - "Data": "Android/iOS 端不支持 PDF 資源文件內容搜索" + "Data": "Android/iOS 端不支援 PDF 資源檔案內容搜索" } ] } From f7e6f610992da9c410ecaf4fad62b8e5e90f7b62 Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Wed, 25 Oct 2023 09:56:27 +0800 Subject: [PATCH 3/3] :art: PDF files larger than 128MB are not included in asset file content searching https://github.com/siyuan-note/siyuan/issues/9500 --- .../20230805231816-h1z9mpc/20230805232134-3d6mx2k.sy | 11 ++++++----- .../20230805225107-qm1m2f5/20230805230218-aea8icj.sy | 11 ++++++----- .../20230805232719-04mqbcx/20230805232920-5fdco36.sy | 10 +++++----- kernel/model/asset_content.go | 4 ++-- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20230805231614-vqn28eh/20230805231816-h1z9mpc/20230805232134-3d6mx2k.sy b/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20230805231614-vqn28eh/20230805231816-h1z9mpc/20230805232134-3d6mx2k.sy index 6c42d38ad..329d9169b 100644 --- a/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20230805231614-vqn28eh/20230805231816-h1z9mpc/20230805232134-3d6mx2k.sy +++ b/app/guide/20210808180117-6v0mkxr/20200923234011-ieuun1p/20230805231614-vqn28eh/20230805231816-h1z9mpc/20230805232134-3d6mx2k.sy @@ -5,7 +5,7 @@ "Properties": { "id": "20230805232134-3d6mx2k", "title": "Search asset content", - "updated": "20231025095300" + "updated": "20231025095559" }, "Children": [ { @@ -215,7 +215,7 @@ "ListData": {}, "Properties": { "id": "20230903112419-45ps71g", - "updated": "20231025095300" + "updated": "20231025095559" }, "Children": [ { @@ -253,7 +253,8 @@ "Marker": "Kg==" }, "Properties": { - "id": "20231025095244-lpf6spo" + "id": "20231025095244-lpf6spo", + "updated": "20231025095559" }, "Children": [ { @@ -261,12 +262,12 @@ "Type": "NodeParagraph", "Properties": { "id": "20231025095244-m8ut4h2", - "updated": "20231025095244" + "updated": "20231025095559" }, "Children": [ { "Type": "NodeText", - "Data": "PDFs larger than 64MB or 1024 pages are not supported" + "Data": "PDFs larger than 128MB or 1024 pages are not supported" } ] } diff --git a/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk/20230805225107-qm1m2f5/20230805230218-aea8icj.sy b/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk/20230805225107-qm1m2f5/20230805230218-aea8icj.sy index 9d754c1b4..951fd81bc 100644 --- a/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk/20230805225107-qm1m2f5/20230805230218-aea8icj.sy +++ b/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk/20230805225107-qm1m2f5/20230805230218-aea8icj.sy @@ -5,7 +5,7 @@ "Properties": { "id": "20230805230218-aea8icj", "title": "搜索资源文件内容", - "updated": "20231025095306" + "updated": "20231025095556" }, "Children": [ { @@ -217,7 +217,7 @@ "ListData": {}, "Properties": { "id": "20230903112243-pwm33kc", - "updated": "20231025095306" + "updated": "20231025095556" }, "Children": [ { @@ -255,7 +255,8 @@ "Marker": "Kg==" }, "Properties": { - "id": "20231025095053-a0quwv1" + "id": "20231025095053-a0quwv1", + "updated": "20231025095556" }, "Children": [ { @@ -263,12 +264,12 @@ "Type": "NodeParagraph", "Properties": { "id": "20231025095053-ee3etgb", - "updated": "20231025095107" + "updated": "20231025095556" }, "Children": [ { "Type": "NodeText", - "Data": "不支持大于 64MB 或 1024 页的 PDF" + "Data": "不支持大于 128MB 或 1024 页的 PDF" } ] } diff --git a/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20230805232636-zh0adz2/20230805232719-04mqbcx/20230805232920-5fdco36.sy b/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20230805232636-zh0adz2/20230805232719-04mqbcx/20230805232920-5fdco36.sy index c6a758149..a3c3a6dc5 100644 --- a/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20230805232636-zh0adz2/20230805232719-04mqbcx/20230805232920-5fdco36.sy +++ b/app/guide/20211226090932-5lcq56f/20211226115423-d5z1joq/20230805232636-zh0adz2/20230805232719-04mqbcx/20230805232920-5fdco36.sy @@ -5,7 +5,7 @@ "Properties": { "id": "20230805232920-5fdco36", "title": "搜索資源文件內容", - "updated": "20231025095336" + "updated": "20231025095553" }, "Children": [ { @@ -220,7 +220,7 @@ "ListData": {}, "Properties": { "id": "20230903112449-mq7jbfe", - "updated": "20231025095336" + "updated": "20231025095553" }, "Children": [ { @@ -259,7 +259,7 @@ }, "Properties": { "id": "20231025095319-kupw6kw", - "updated": "20231025095323" + "updated": "20231025095553" }, "Children": [ { @@ -267,12 +267,12 @@ "Type": "NodeParagraph", "Properties": { "id": "20231025095319-egfy5yi", - "updated": "20231025095323" + "updated": "20231025095553" }, "Children": [ { "Type": "NodeText", - "Data": "不支援大於 64MB 或 1024 頁的 PDF" + "Data": "不支援大於 128MB 或 1024 頁的 PDF" } ] } diff --git a/kernel/model/asset_content.go b/kernel/model/asset_content.go index cfd356e02..1da770740 100644 --- a/kernel/model/asset_content.go +++ b/kernel/model/asset_content.go @@ -480,7 +480,7 @@ func NewAssetsSearcher() *AssetsSearcher { const ( TxtAssetContentMaxSize = 1024 * 1024 * 4 PDFAssetContentMaxPage = 1024 - PDFAssetContentMaxSize = 1024 * 1024 * 64 + PDFAssetContentMaxSize = 1024 * 1024 * 128 ) type AssetParseResult struct { @@ -830,7 +830,7 @@ func (parser *PdfAssetParser) Parse(absPath string) (ret *AssetParseResult) { } if PDFAssetContentMaxSize < len(pdfData) { - // PDF files larger than 64MB are not included in asset file content searching https://github.com/siyuan-note/siyuan/issues/9500 + // PDF files larger than 128MB are not included in asset file content searching https://github.com/siyuan-note/siyuan/issues/9500 logging.LogWarnf("ignore large PDF asset [%s] with [%s]", absPath, humanize.Bytes(uint64(len(pdfData)))) return }