diff --git a/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk.sy b/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk.sy index 49bde2fee..1b873ed91 100644 --- a/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk.sy +++ b/app/guide/20210808180117-czj9bvb/20200812220555-lj3enxa/20230805222417-2lj3dvk.sy @@ -6,7 +6,7 @@ "icon": "1f451", "id": "20230805222417-2lj3dvk", "title": "会员特权", - "updated": "20230805225639" + "updated": "20230817114518" }, "Children": [ { @@ -29,7 +29,7 @@ "ListData": {}, "Properties": { "id": "20230805225040-mq394af", - "updated": "20230805225232" + "updated": "20230817114518" }, "Children": [ { @@ -41,7 +41,7 @@ }, "Properties": { "id": "20230805225041-vbfv9l9", - "updated": "20230805225232" + "updated": "20230817114518" }, "Children": [ { @@ -49,7 +49,7 @@ "Type": "NodeParagraph", "Properties": { "id": "20230805225041-vcse7tk", - "updated": "20230805225232" + "updated": "20230817114518" }, "Children": [ { @@ -59,7 +59,7 @@ { "Type": "NodeTextMark", "TextMarkType": "block-ref", - "TextMarkBlockRefID": "20210808180320-m0ztypq", + "TextMarkBlockRefID": "20230808120348-orm8sjf", "TextMarkBlockRefSubtype": "s", "TextMarkTextContent": "云端服务" } diff --git a/app/src/util/assets.ts b/app/src/util/assets.ts index ccc4be403..07ab3039e 100644 --- a/app/src/util/assets.ts +++ b/app/src/util/assets.ts @@ -173,6 +173,7 @@ export const addGA = () => { subscriptionStatus: -1, subscriptionPlan: -1, subscriptionType: -1, + oneTimePayStatus: -1, syncEnabled: false, syncProvider: -1, cTreeCount: window.siyuan.config.stat.cTreeCount, @@ -185,6 +186,7 @@ export const addGA = () => { para.subscriptionStatus = window.siyuan.user.userSiYuanSubscriptionStatus; para.subscriptionPlan = window.siyuan.user.userSiYuanSubscriptionPlan; para.subscriptionType = window.siyuan.user.userSiYuanSubscriptionType; + para.oneTimePayStatus = window.siyuan.user.userSiYuanOneTimePayStatus; } if (window.siyuan.config.sync) { para.syncEnabled = window.siyuan.config.sync.enabled; diff --git a/kernel/go.mod b/kernel/go.mod index 2f0a4e97d..3c10518e6 100644 --- a/kernel/go.mod +++ b/kernel/go.mod @@ -36,6 +36,7 @@ require ( github.com/imroc/req/v3 v3.41.4 github.com/jinzhu/copier v0.3.5 github.com/json-iterator/go v1.1.12 + github.com/klippa-app/go-pdfium v1.6.0 github.com/mattn/go-sqlite3 v2.0.3+incompatible github.com/mitchellh/go-ps v1.0.0 github.com/mssola/useragent v1.0.0 @@ -108,6 +109,7 @@ require ( github.com/imdario/mergo v0.3.16 // indirect github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect + github.com/jolestar/go-commons-pool/v2 v2.1.2 // indirect github.com/juju/errors v1.0.0 // indirect github.com/klauspost/compress v1.16.7 // indirect github.com/klauspost/cpuid/v2 v2.2.5 // indirect @@ -141,6 +143,7 @@ require ( github.com/shopspring/decimal v1.3.1 // indirect github.com/spf13/cast v1.5.1 // indirect github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect + github.com/tetratelabs/wazero v1.3.1 // indirect github.com/tklauser/go-sysconf v0.3.11 // indirect github.com/tklauser/numcpus v0.6.1 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect diff --git a/kernel/go.sum b/kernel/go.sum index 9fd28e9dd..81202f3c6 100644 --- a/kernel/go.sum +++ b/kernel/go.sum @@ -173,6 +173,8 @@ github.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA= github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI= github.com/flopp/go-findfont v0.1.0 h1:lPn0BymDUtJo+ZkV01VS3661HL6F4qFlkhcJN55u6mU= github.com/flopp/go-findfont v0.1.0/go.mod h1:wKKxRDjD024Rh7VMwoU90i6ikQRCr+JTHB5n4Ejkqvw= +github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= +github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= @@ -354,6 +356,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/jolestar/go-commons-pool/v2 v2.1.2 h1:E+XGo58F23t7HtZiC/W6jzO2Ux2IccSH/yx4nD+J1CM= +github.com/jolestar/go-commons-pool/v2 v2.1.2/go.mod h1:r4NYccrkS5UqP1YQI1COyTZ9UjPJAAGTUxzcsK1kqhY= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= @@ -373,6 +377,8 @@ github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/klippa-app/go-pdfium v1.6.0 h1:swz+bKYsrRSuPrczot2cE/FoR/1h13R8CjBOv2RcDm4= +github.com/klippa-app/go-pdfium v1.6.0/go.mod h1:Lh8U8bQ+Idxz3e89+0u59j64YTPaO3G5JbvRImVqIio= github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= @@ -423,7 +429,7 @@ github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU= github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM= -github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc= +github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= github.com/open-spaced-repetition/go-fsrs v1.0.0 h1:FIH5MW29EEHTTLwXPeDWJqO4QA/b7L9i2GEH4lAEl0U= github.com/open-spaced-repetition/go-fsrs v1.0.0/go.mod h1:WpbNs4TTKZChOHFO+ME0B9femUVZsepFT5mhAioszRg= github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE= @@ -538,6 +544,8 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/studio-b12/gowebdav v0.9.0 h1:1j1sc9gQnNxbXXM4M/CebPOX4aXYtr7MojAVcN4dHjU= github.com/studio-b12/gowebdav v0.9.0/go.mod h1:bHA7t77X/QFExdeAnDzK6vKM34kEZAcE1OX4MfiwjkE= +github.com/tetratelabs/wazero v1.3.1 h1:rnb9FgOEQRLLR8tgoD1mfjNjMhFeWRUk+a4b4j/GpUM= +github.com/tetratelabs/wazero v1.3.1/go.mod h1:wYx2gNRg8/WihJfSDxA1TIL8H+GkfLYm+bIfbblu9VQ= github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4= diff --git a/kernel/model/asset_content.go b/kernel/model/asset_content.go index 622139168..a31163f27 100644 --- a/kernel/model/asset_content.go +++ b/kernel/model/asset_content.go @@ -24,11 +24,14 @@ import ( "strconv" "strings" "sync" + "time" "code.sajari.com/docconv" "github.com/88250/gulu" "github.com/88250/lute/ast" "github.com/dustin/go-humanize" + "github.com/klippa-app/go-pdfium/requests" + "github.com/klippa-app/go-pdfium/webassembly" "github.com/siyuan-note/eventbus" "github.com/siyuan-note/filelock" "github.com/siyuan-note/logging" @@ -287,8 +290,8 @@ func IndexAssetContent(absPath string) { assetsDir := util.GetDataAssetsAbsPath() ext := strings.ToLower(filepath.Ext(absPath)) - parser, found := assetContentSearcher.Parsers[ext] - if !found { + parser := assetContentSearcher.GetParser(ext) + if nil == parser { return } @@ -349,9 +352,15 @@ var ( ) type AssetsSearcher struct { - Parsers map[string]AssetParser + parsers map[string]AssetParser + lock *sync.Mutex +} - lock *sync.Mutex +func (searcher *AssetsSearcher) GetParser(ext string) AssetParser { + searcher.lock.Lock() + defer searcher.lock.Unlock() + + return searcher.parsers[ext] } func (searcher *AssetsSearcher) FullIndex() { @@ -374,8 +383,8 @@ func (searcher *AssetsSearcher) FullIndex() { } ext := strings.ToLower(filepath.Ext(absPath)) - parser, found := searcher.Parsers[ext] - if !found { + parser := searcher.GetParser(ext) + if nil == parser { return nil } @@ -408,14 +417,43 @@ func (searcher *AssetsSearcher) FullIndex() { } func NewAssetsSearcher() *AssetsSearcher { + txtAssetParser := &TxtAssetParser{} return &AssetsSearcher{ - Parsers: map[string]AssetParser{ - ".txt": &TxtAssetParser{}, - ".md": &TxtAssetParser{}, - ".markdown": &TxtAssetParser{}, + parsers: map[string]AssetParser{ + ".txt": txtAssetParser, + ".md": txtAssetParser, + ".markdown": txtAssetParser, + ".json": txtAssetParser, + ".log": txtAssetParser, + ".sql": txtAssetParser, + ".html": txtAssetParser, + ".xml": txtAssetParser, + ".java": txtAssetParser, + ".h": txtAssetParser, + ".c": txtAssetParser, + ".cpp": txtAssetParser, + ".go": txtAssetParser, + ".swift": txtAssetParser, + ".kt": txtAssetParser, + ".py": txtAssetParser, + ".js": txtAssetParser, + ".css": txtAssetParser, + ".ts": txtAssetParser, + ".sh": txtAssetParser, + ".bat": txtAssetParser, + ".cmd": txtAssetParser, + ".ini": txtAssetParser, + ".yaml": txtAssetParser, + ".rst": txtAssetParser, + ".adoc": txtAssetParser, + ".textile": txtAssetParser, + ".opml": txtAssetParser, + ".org": txtAssetParser, + ".wiki": txtAssetParser, ".docx": &DocxAssetParser{}, ".pptx": &PptxAssetParser{}, ".xlsx": &XlsxAssetParser{}, + ".pdf": &PdfAssetParser{}, }, lock: &sync.Mutex{}, @@ -604,3 +642,98 @@ func (parser *XlsxAssetParser) Parse(absPath string) (ret *AssetParseResult) { } return } + +// PdfAssetParser parser factory product +type PdfAssetParser struct { +} + +// Parse will parse a PDF document using PDFium webassembly module +func (parser *PdfAssetParser) Parse(absPath string) (ret *AssetParseResult) { + if !strings.HasSuffix(strings.ToLower(absPath), ".pdf") { + return + } + + if !gulu.File.IsExist(absPath) { + return + } + + tmp := copyTempAsset(absPath) + if "" == tmp { + return + } + defer os.RemoveAll(tmp) + + f, err := os.Open(tmp) + if nil != err { + logging.LogErrorf("open [%s] failed: [%s]", tmp, err) + return + } + defer f.Close() + + stat, err := f.Stat() + if nil != err { + logging.LogErrorf("open [%s] failed: [%s]", tmp, err) + return + } + + // initialize pdfium with one worker + pool, err := webassembly.Init(webassembly.Config{ + MinIdle: 1, + MaxIdle: 1, + MaxTotal: 1, + }) + if err != nil { + logging.LogErrorf("convert [%s] failed: [%s]", tmp, err) + return + } + defer pool.Close() + + instance, err := pool.GetInstance(time.Second * 30) + if err != nil { + logging.LogErrorf("convert [%s] failed: [%s]", tmp, err) + return + } + defer instance.Close() + + // get number of pages inside PDF document + doc, err := instance.OpenDocument(&requests.OpenDocument{ + FileReader: f, + FileReaderSize: stat.Size(), + }) + if err != nil { + logging.LogErrorf("convert [%s] failed: [%s]", tmp, err) + return + } + defer instance.FPDF_CloseDocument(&requests.FPDF_CloseDocument{ + Document: doc.Document, + }) + + pageCount, err := instance.FPDF_GetPageCount(&requests.FPDF_GetPageCount{Document: doc.Document}) + if err != nil { + logging.LogErrorf("convert [%s] failed: [%s]", tmp, err) + return + } + // loop through pages and get content + content := "" + for page := 0; page < pageCount.PageCount; page++ { + req := &requests.GetPageText{ + Page: requests.Page{ + ByIndex: &requests.PageByIndex{ + Document: doc.Document, + Index: page, + }, + }, + } + pt, err := instance.GetPageText(req) + if err != nil { + logging.LogErrorf("convert [%s] failed: [%s]", tmp, err) + return + } + content += " " + normalizeAssetContent(pt.Text) + } + + ret = &AssetParseResult{ + Content: content, + } + return +} diff --git a/kernel/model/asset_content_test.go b/kernel/model/asset_content_test.go new file mode 100644 index 000000000..bf6794e9d --- /dev/null +++ b/kernel/model/asset_content_test.go @@ -0,0 +1,29 @@ +// SiYuan - Refactor your thinking +// Copyright (c) 2020-present, b3log.org +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package model + +import ( + "testing" +) + +func TestPDFParser(t *testing.T) { + p := &PdfAssetParser{} + res := p.Parse("../testdata/parsertest.pdf") + if res == nil || res.Content == "" { + t.Fatalf("empty or nil PDF content result") + } +} diff --git a/kernel/testdata/parsertest.pdf b/kernel/testdata/parsertest.pdf new file mode 100644 index 000000000..1d731130f Binary files /dev/null and b/kernel/testdata/parsertest.pdf differ