Merge remote-tracking branch 'origin/dev' into dev

This commit is contained in:
Vanessa 2023-08-17 17:41:29 +08:00
commit e87b831da7
7 changed files with 191 additions and 16 deletions

View file

@ -6,7 +6,7 @@
"icon": "1f451",
"id": "20230805222417-2lj3dvk",
"title": "会员特权",
"updated": "20230805225639"
"updated": "20230817114518"
},
"Children": [
{
@ -29,7 +29,7 @@
"ListData": {},
"Properties": {
"id": "20230805225040-mq394af",
"updated": "20230805225232"
"updated": "20230817114518"
},
"Children": [
{
@ -41,7 +41,7 @@
},
"Properties": {
"id": "20230805225041-vbfv9l9",
"updated": "20230805225232"
"updated": "20230817114518"
},
"Children": [
{
@ -49,7 +49,7 @@
"Type": "NodeParagraph",
"Properties": {
"id": "20230805225041-vcse7tk",
"updated": "20230805225232"
"updated": "20230817114518"
},
"Children": [
{
@ -59,7 +59,7 @@
{
"Type": "NodeTextMark",
"TextMarkType": "block-ref",
"TextMarkBlockRefID": "20210808180320-m0ztypq",
"TextMarkBlockRefID": "20230808120348-orm8sjf",
"TextMarkBlockRefSubtype": "s",
"TextMarkTextContent": "云端服务"
}

View file

@ -173,6 +173,7 @@ export const addGA = () => {
subscriptionStatus: -1,
subscriptionPlan: -1,
subscriptionType: -1,
oneTimePayStatus: -1,
syncEnabled: false,
syncProvider: -1,
cTreeCount: window.siyuan.config.stat.cTreeCount,
@ -185,6 +186,7 @@ export const addGA = () => {
para.subscriptionStatus = window.siyuan.user.userSiYuanSubscriptionStatus;
para.subscriptionPlan = window.siyuan.user.userSiYuanSubscriptionPlan;
para.subscriptionType = window.siyuan.user.userSiYuanSubscriptionType;
para.oneTimePayStatus = window.siyuan.user.userSiYuanOneTimePayStatus;
}
if (window.siyuan.config.sync) {
para.syncEnabled = window.siyuan.config.sync.enabled;

View file

@ -36,6 +36,7 @@ require (
github.com/imroc/req/v3 v3.41.4
github.com/jinzhu/copier v0.3.5
github.com/json-iterator/go v1.1.12
github.com/klippa-app/go-pdfium v1.6.0
github.com/mattn/go-sqlite3 v2.0.3+incompatible
github.com/mitchellh/go-ps v1.0.0
github.com/mssola/useragent v1.0.0
@ -108,6 +109,7 @@ require (
github.com/imdario/mergo v0.3.16 // indirect
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/jolestar/go-commons-pool/v2 v2.1.2 // indirect
github.com/juju/errors v1.0.0 // indirect
github.com/klauspost/compress v1.16.7 // indirect
github.com/klauspost/cpuid/v2 v2.2.5 // indirect
@ -141,6 +143,7 @@ require (
github.com/shopspring/decimal v1.3.1 // indirect
github.com/spf13/cast v1.5.1 // indirect
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
github.com/tetratelabs/wazero v1.3.1 // indirect
github.com/tklauser/go-sysconf v0.3.11 // indirect
github.com/tklauser/numcpus v0.6.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect

View file

@ -173,6 +173,8 @@ github.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA=
github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI=
github.com/flopp/go-findfont v0.1.0 h1:lPn0BymDUtJo+ZkV01VS3661HL6F4qFlkhcJN55u6mU=
github.com/flopp/go-findfont v0.1.0/go.mod h1:wKKxRDjD024Rh7VMwoU90i6ikQRCr+JTHB5n4Ejkqvw=
github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
@ -354,6 +356,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/jolestar/go-commons-pool/v2 v2.1.2 h1:E+XGo58F23t7HtZiC/W6jzO2Ux2IccSH/yx4nD+J1CM=
github.com/jolestar/go-commons-pool/v2 v2.1.2/go.mod h1:r4NYccrkS5UqP1YQI1COyTZ9UjPJAAGTUxzcsK1kqhY=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
@ -373,6 +377,8 @@ github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/klippa-app/go-pdfium v1.6.0 h1:swz+bKYsrRSuPrczot2cE/FoR/1h13R8CjBOv2RcDm4=
github.com/klippa-app/go-pdfium v1.6.0/go.mod h1:Lh8U8bQ+Idxz3e89+0u59j64YTPaO3G5JbvRImVqIio=
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
@ -423,7 +429,7 @@ github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn
github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA=
github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU=
github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM=
github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc=
github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI=
github.com/open-spaced-repetition/go-fsrs v1.0.0 h1:FIH5MW29EEHTTLwXPeDWJqO4QA/b7L9i2GEH4lAEl0U=
github.com/open-spaced-repetition/go-fsrs v1.0.0/go.mod h1:WpbNs4TTKZChOHFO+ME0B9femUVZsepFT5mhAioszRg=
github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE=
@ -538,6 +544,8 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/studio-b12/gowebdav v0.9.0 h1:1j1sc9gQnNxbXXM4M/CebPOX4aXYtr7MojAVcN4dHjU=
github.com/studio-b12/gowebdav v0.9.0/go.mod h1:bHA7t77X/QFExdeAnDzK6vKM34kEZAcE1OX4MfiwjkE=
github.com/tetratelabs/wazero v1.3.1 h1:rnb9FgOEQRLLR8tgoD1mfjNjMhFeWRUk+a4b4j/GpUM=
github.com/tetratelabs/wazero v1.3.1/go.mod h1:wYx2gNRg8/WihJfSDxA1TIL8H+GkfLYm+bIfbblu9VQ=
github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM=
github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI=
github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4=

View file

@ -24,11 +24,14 @@ import (
"strconv"
"strings"
"sync"
"time"
"code.sajari.com/docconv"
"github.com/88250/gulu"
"github.com/88250/lute/ast"
"github.com/dustin/go-humanize"
"github.com/klippa-app/go-pdfium/requests"
"github.com/klippa-app/go-pdfium/webassembly"
"github.com/siyuan-note/eventbus"
"github.com/siyuan-note/filelock"
"github.com/siyuan-note/logging"
@ -287,8 +290,8 @@ func IndexAssetContent(absPath string) {
assetsDir := util.GetDataAssetsAbsPath()
ext := strings.ToLower(filepath.Ext(absPath))
parser, found := assetContentSearcher.Parsers[ext]
if !found {
parser := assetContentSearcher.GetParser(ext)
if nil == parser {
return
}
@ -349,9 +352,15 @@ var (
)
type AssetsSearcher struct {
Parsers map[string]AssetParser
parsers map[string]AssetParser
lock *sync.Mutex
}
lock *sync.Mutex
func (searcher *AssetsSearcher) GetParser(ext string) AssetParser {
searcher.lock.Lock()
defer searcher.lock.Unlock()
return searcher.parsers[ext]
}
func (searcher *AssetsSearcher) FullIndex() {
@ -374,8 +383,8 @@ func (searcher *AssetsSearcher) FullIndex() {
}
ext := strings.ToLower(filepath.Ext(absPath))
parser, found := searcher.Parsers[ext]
if !found {
parser := searcher.GetParser(ext)
if nil == parser {
return nil
}
@ -408,14 +417,43 @@ func (searcher *AssetsSearcher) FullIndex() {
}
func NewAssetsSearcher() *AssetsSearcher {
txtAssetParser := &TxtAssetParser{}
return &AssetsSearcher{
Parsers: map[string]AssetParser{
".txt": &TxtAssetParser{},
".md": &TxtAssetParser{},
".markdown": &TxtAssetParser{},
parsers: map[string]AssetParser{
".txt": txtAssetParser,
".md": txtAssetParser,
".markdown": txtAssetParser,
".json": txtAssetParser,
".log": txtAssetParser,
".sql": txtAssetParser,
".html": txtAssetParser,
".xml": txtAssetParser,
".java": txtAssetParser,
".h": txtAssetParser,
".c": txtAssetParser,
".cpp": txtAssetParser,
".go": txtAssetParser,
".swift": txtAssetParser,
".kt": txtAssetParser,
".py": txtAssetParser,
".js": txtAssetParser,
".css": txtAssetParser,
".ts": txtAssetParser,
".sh": txtAssetParser,
".bat": txtAssetParser,
".cmd": txtAssetParser,
".ini": txtAssetParser,
".yaml": txtAssetParser,
".rst": txtAssetParser,
".adoc": txtAssetParser,
".textile": txtAssetParser,
".opml": txtAssetParser,
".org": txtAssetParser,
".wiki": txtAssetParser,
".docx": &DocxAssetParser{},
".pptx": &PptxAssetParser{},
".xlsx": &XlsxAssetParser{},
".pdf": &PdfAssetParser{},
},
lock: &sync.Mutex{},
@ -604,3 +642,98 @@ func (parser *XlsxAssetParser) Parse(absPath string) (ret *AssetParseResult) {
}
return
}
// PdfAssetParser parser factory product
type PdfAssetParser struct {
}
// Parse will parse a PDF document using PDFium webassembly module
func (parser *PdfAssetParser) Parse(absPath string) (ret *AssetParseResult) {
if !strings.HasSuffix(strings.ToLower(absPath), ".pdf") {
return
}
if !gulu.File.IsExist(absPath) {
return
}
tmp := copyTempAsset(absPath)
if "" == tmp {
return
}
defer os.RemoveAll(tmp)
f, err := os.Open(tmp)
if nil != err {
logging.LogErrorf("open [%s] failed: [%s]", tmp, err)
return
}
defer f.Close()
stat, err := f.Stat()
if nil != err {
logging.LogErrorf("open [%s] failed: [%s]", tmp, err)
return
}
// initialize pdfium with one worker
pool, err := webassembly.Init(webassembly.Config{
MinIdle: 1,
MaxIdle: 1,
MaxTotal: 1,
})
if err != nil {
logging.LogErrorf("convert [%s] failed: [%s]", tmp, err)
return
}
defer pool.Close()
instance, err := pool.GetInstance(time.Second * 30)
if err != nil {
logging.LogErrorf("convert [%s] failed: [%s]", tmp, err)
return
}
defer instance.Close()
// get number of pages inside PDF document
doc, err := instance.OpenDocument(&requests.OpenDocument{
FileReader: f,
FileReaderSize: stat.Size(),
})
if err != nil {
logging.LogErrorf("convert [%s] failed: [%s]", tmp, err)
return
}
defer instance.FPDF_CloseDocument(&requests.FPDF_CloseDocument{
Document: doc.Document,
})
pageCount, err := instance.FPDF_GetPageCount(&requests.FPDF_GetPageCount{Document: doc.Document})
if err != nil {
logging.LogErrorf("convert [%s] failed: [%s]", tmp, err)
return
}
// loop through pages and get content
content := ""
for page := 0; page < pageCount.PageCount; page++ {
req := &requests.GetPageText{
Page: requests.Page{
ByIndex: &requests.PageByIndex{
Document: doc.Document,
Index: page,
},
},
}
pt, err := instance.GetPageText(req)
if err != nil {
logging.LogErrorf("convert [%s] failed: [%s]", tmp, err)
return
}
content += " " + normalizeAssetContent(pt.Text)
}
ret = &AssetParseResult{
Content: content,
}
return
}

View file

@ -0,0 +1,29 @@
// SiYuan - Refactor your thinking
// Copyright (c) 2020-present, b3log.org
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package model
import (
"testing"
)
func TestPDFParser(t *testing.T) {
p := &PdfAssetParser{}
res := p.Parse("../testdata/parsertest.pdf")
if res == nil || res.Content == "" {
t.Fatalf("empty or nil PDF content result")
}
}

BIN
kernel/testdata/parsertest.pdf vendored Normal file

Binary file not shown.