mirror of
https://github.com/siyuan-note/siyuan.git
synced 2025-12-24 10:30:13 +01:00
Merge remote-tracking branch 'origin/dev' into dev
This commit is contained in:
commit
e87b831da7
7 changed files with 191 additions and 16 deletions
|
|
@ -6,7 +6,7 @@
|
|||
"icon": "1f451",
|
||||
"id": "20230805222417-2lj3dvk",
|
||||
"title": "会员特权",
|
||||
"updated": "20230805225639"
|
||||
"updated": "20230817114518"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
"ListData": {},
|
||||
"Properties": {
|
||||
"id": "20230805225040-mq394af",
|
||||
"updated": "20230805225232"
|
||||
"updated": "20230817114518"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -41,7 +41,7 @@
|
|||
},
|
||||
"Properties": {
|
||||
"id": "20230805225041-vbfv9l9",
|
||||
"updated": "20230805225232"
|
||||
"updated": "20230817114518"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -49,7 +49,7 @@
|
|||
"Type": "NodeParagraph",
|
||||
"Properties": {
|
||||
"id": "20230805225041-vcse7tk",
|
||||
"updated": "20230805225232"
|
||||
"updated": "20230817114518"
|
||||
},
|
||||
"Children": [
|
||||
{
|
||||
|
|
@ -59,7 +59,7 @@
|
|||
{
|
||||
"Type": "NodeTextMark",
|
||||
"TextMarkType": "block-ref",
|
||||
"TextMarkBlockRefID": "20210808180320-m0ztypq",
|
||||
"TextMarkBlockRefID": "20230808120348-orm8sjf",
|
||||
"TextMarkBlockRefSubtype": "s",
|
||||
"TextMarkTextContent": "云端服务"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -173,6 +173,7 @@ export const addGA = () => {
|
|||
subscriptionStatus: -1,
|
||||
subscriptionPlan: -1,
|
||||
subscriptionType: -1,
|
||||
oneTimePayStatus: -1,
|
||||
syncEnabled: false,
|
||||
syncProvider: -1,
|
||||
cTreeCount: window.siyuan.config.stat.cTreeCount,
|
||||
|
|
@ -185,6 +186,7 @@ export const addGA = () => {
|
|||
para.subscriptionStatus = window.siyuan.user.userSiYuanSubscriptionStatus;
|
||||
para.subscriptionPlan = window.siyuan.user.userSiYuanSubscriptionPlan;
|
||||
para.subscriptionType = window.siyuan.user.userSiYuanSubscriptionType;
|
||||
para.oneTimePayStatus = window.siyuan.user.userSiYuanOneTimePayStatus;
|
||||
}
|
||||
if (window.siyuan.config.sync) {
|
||||
para.syncEnabled = window.siyuan.config.sync.enabled;
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ require (
|
|||
github.com/imroc/req/v3 v3.41.4
|
||||
github.com/jinzhu/copier v0.3.5
|
||||
github.com/json-iterator/go v1.1.12
|
||||
github.com/klippa-app/go-pdfium v1.6.0
|
||||
github.com/mattn/go-sqlite3 v2.0.3+incompatible
|
||||
github.com/mitchellh/go-ps v1.0.0
|
||||
github.com/mssola/useragent v1.0.0
|
||||
|
|
@ -108,6 +109,7 @@ require (
|
|||
github.com/imdario/mergo v0.3.16 // indirect
|
||||
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 // indirect
|
||||
github.com/jmespath/go-jmespath v0.4.0 // indirect
|
||||
github.com/jolestar/go-commons-pool/v2 v2.1.2 // indirect
|
||||
github.com/juju/errors v1.0.0 // indirect
|
||||
github.com/klauspost/compress v1.16.7 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.5 // indirect
|
||||
|
|
@ -141,6 +143,7 @@ require (
|
|||
github.com/shopspring/decimal v1.3.1 // indirect
|
||||
github.com/spf13/cast v1.5.1 // indirect
|
||||
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
|
||||
github.com/tetratelabs/wazero v1.3.1 // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.11 // indirect
|
||||
github.com/tklauser/numcpus v0.6.1 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
|
|
|
|||
|
|
@ -173,6 +173,8 @@ github.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA=
|
|||
github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI=
|
||||
github.com/flopp/go-findfont v0.1.0 h1:lPn0BymDUtJo+ZkV01VS3661HL6F4qFlkhcJN55u6mU=
|
||||
github.com/flopp/go-findfont v0.1.0/go.mod h1:wKKxRDjD024Rh7VMwoU90i6ikQRCr+JTHB5n4Ejkqvw=
|
||||
github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
|
||||
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
|
||||
github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY=
|
||||
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
|
||||
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
|
||||
|
|
@ -354,6 +356,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y
|
|||
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
|
||||
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
|
||||
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
|
||||
github.com/jolestar/go-commons-pool/v2 v2.1.2 h1:E+XGo58F23t7HtZiC/W6jzO2Ux2IccSH/yx4nD+J1CM=
|
||||
github.com/jolestar/go-commons-pool/v2 v2.1.2/go.mod h1:r4NYccrkS5UqP1YQI1COyTZ9UjPJAAGTUxzcsK1kqhY=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||
|
|
@ -373,6 +377,8 @@ github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo
|
|||
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
|
||||
github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
|
||||
github.com/klippa-app/go-pdfium v1.6.0 h1:swz+bKYsrRSuPrczot2cE/FoR/1h13R8CjBOv2RcDm4=
|
||||
github.com/klippa-app/go-pdfium v1.6.0/go.mod h1:Lh8U8bQ+Idxz3e89+0u59j64YTPaO3G5JbvRImVqIio=
|
||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
|
|
@ -423,7 +429,7 @@ github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn
|
|||
github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA=
|
||||
github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU=
|
||||
github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM=
|
||||
github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc=
|
||||
github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI=
|
||||
github.com/open-spaced-repetition/go-fsrs v1.0.0 h1:FIH5MW29EEHTTLwXPeDWJqO4QA/b7L9i2GEH4lAEl0U=
|
||||
github.com/open-spaced-repetition/go-fsrs v1.0.0/go.mod h1:WpbNs4TTKZChOHFO+ME0B9femUVZsepFT5mhAioszRg=
|
||||
github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE=
|
||||
|
|
@ -538,6 +544,8 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU
|
|||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/studio-b12/gowebdav v0.9.0 h1:1j1sc9gQnNxbXXM4M/CebPOX4aXYtr7MojAVcN4dHjU=
|
||||
github.com/studio-b12/gowebdav v0.9.0/go.mod h1:bHA7t77X/QFExdeAnDzK6vKM34kEZAcE1OX4MfiwjkE=
|
||||
github.com/tetratelabs/wazero v1.3.1 h1:rnb9FgOEQRLLR8tgoD1mfjNjMhFeWRUk+a4b4j/GpUM=
|
||||
github.com/tetratelabs/wazero v1.3.1/go.mod h1:wYx2gNRg8/WihJfSDxA1TIL8H+GkfLYm+bIfbblu9VQ=
|
||||
github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM=
|
||||
github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI=
|
||||
github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4=
|
||||
|
|
|
|||
|
|
@ -24,11 +24,14 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"code.sajari.com/docconv"
|
||||
"github.com/88250/gulu"
|
||||
"github.com/88250/lute/ast"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/klippa-app/go-pdfium/requests"
|
||||
"github.com/klippa-app/go-pdfium/webassembly"
|
||||
"github.com/siyuan-note/eventbus"
|
||||
"github.com/siyuan-note/filelock"
|
||||
"github.com/siyuan-note/logging"
|
||||
|
|
@ -287,8 +290,8 @@ func IndexAssetContent(absPath string) {
|
|||
assetsDir := util.GetDataAssetsAbsPath()
|
||||
|
||||
ext := strings.ToLower(filepath.Ext(absPath))
|
||||
parser, found := assetContentSearcher.Parsers[ext]
|
||||
if !found {
|
||||
parser := assetContentSearcher.GetParser(ext)
|
||||
if nil == parser {
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -349,9 +352,15 @@ var (
|
|||
)
|
||||
|
||||
type AssetsSearcher struct {
|
||||
Parsers map[string]AssetParser
|
||||
parsers map[string]AssetParser
|
||||
lock *sync.Mutex
|
||||
}
|
||||
|
||||
lock *sync.Mutex
|
||||
func (searcher *AssetsSearcher) GetParser(ext string) AssetParser {
|
||||
searcher.lock.Lock()
|
||||
defer searcher.lock.Unlock()
|
||||
|
||||
return searcher.parsers[ext]
|
||||
}
|
||||
|
||||
func (searcher *AssetsSearcher) FullIndex() {
|
||||
|
|
@ -374,8 +383,8 @@ func (searcher *AssetsSearcher) FullIndex() {
|
|||
}
|
||||
|
||||
ext := strings.ToLower(filepath.Ext(absPath))
|
||||
parser, found := searcher.Parsers[ext]
|
||||
if !found {
|
||||
parser := searcher.GetParser(ext)
|
||||
if nil == parser {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -408,14 +417,43 @@ func (searcher *AssetsSearcher) FullIndex() {
|
|||
}
|
||||
|
||||
func NewAssetsSearcher() *AssetsSearcher {
|
||||
txtAssetParser := &TxtAssetParser{}
|
||||
return &AssetsSearcher{
|
||||
Parsers: map[string]AssetParser{
|
||||
".txt": &TxtAssetParser{},
|
||||
".md": &TxtAssetParser{},
|
||||
".markdown": &TxtAssetParser{},
|
||||
parsers: map[string]AssetParser{
|
||||
".txt": txtAssetParser,
|
||||
".md": txtAssetParser,
|
||||
".markdown": txtAssetParser,
|
||||
".json": txtAssetParser,
|
||||
".log": txtAssetParser,
|
||||
".sql": txtAssetParser,
|
||||
".html": txtAssetParser,
|
||||
".xml": txtAssetParser,
|
||||
".java": txtAssetParser,
|
||||
".h": txtAssetParser,
|
||||
".c": txtAssetParser,
|
||||
".cpp": txtAssetParser,
|
||||
".go": txtAssetParser,
|
||||
".swift": txtAssetParser,
|
||||
".kt": txtAssetParser,
|
||||
".py": txtAssetParser,
|
||||
".js": txtAssetParser,
|
||||
".css": txtAssetParser,
|
||||
".ts": txtAssetParser,
|
||||
".sh": txtAssetParser,
|
||||
".bat": txtAssetParser,
|
||||
".cmd": txtAssetParser,
|
||||
".ini": txtAssetParser,
|
||||
".yaml": txtAssetParser,
|
||||
".rst": txtAssetParser,
|
||||
".adoc": txtAssetParser,
|
||||
".textile": txtAssetParser,
|
||||
".opml": txtAssetParser,
|
||||
".org": txtAssetParser,
|
||||
".wiki": txtAssetParser,
|
||||
".docx": &DocxAssetParser{},
|
||||
".pptx": &PptxAssetParser{},
|
||||
".xlsx": &XlsxAssetParser{},
|
||||
".pdf": &PdfAssetParser{},
|
||||
},
|
||||
|
||||
lock: &sync.Mutex{},
|
||||
|
|
@ -604,3 +642,98 @@ func (parser *XlsxAssetParser) Parse(absPath string) (ret *AssetParseResult) {
|
|||
}
|
||||
return
|
||||
}
|
||||
|
||||
// PdfAssetParser parser factory product
|
||||
type PdfAssetParser struct {
|
||||
}
|
||||
|
||||
// Parse will parse a PDF document using PDFium webassembly module
|
||||
func (parser *PdfAssetParser) Parse(absPath string) (ret *AssetParseResult) {
|
||||
if !strings.HasSuffix(strings.ToLower(absPath), ".pdf") {
|
||||
return
|
||||
}
|
||||
|
||||
if !gulu.File.IsExist(absPath) {
|
||||
return
|
||||
}
|
||||
|
||||
tmp := copyTempAsset(absPath)
|
||||
if "" == tmp {
|
||||
return
|
||||
}
|
||||
defer os.RemoveAll(tmp)
|
||||
|
||||
f, err := os.Open(tmp)
|
||||
if nil != err {
|
||||
logging.LogErrorf("open [%s] failed: [%s]", tmp, err)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
stat, err := f.Stat()
|
||||
if nil != err {
|
||||
logging.LogErrorf("open [%s] failed: [%s]", tmp, err)
|
||||
return
|
||||
}
|
||||
|
||||
// initialize pdfium with one worker
|
||||
pool, err := webassembly.Init(webassembly.Config{
|
||||
MinIdle: 1,
|
||||
MaxIdle: 1,
|
||||
MaxTotal: 1,
|
||||
})
|
||||
if err != nil {
|
||||
logging.LogErrorf("convert [%s] failed: [%s]", tmp, err)
|
||||
return
|
||||
}
|
||||
defer pool.Close()
|
||||
|
||||
instance, err := pool.GetInstance(time.Second * 30)
|
||||
if err != nil {
|
||||
logging.LogErrorf("convert [%s] failed: [%s]", tmp, err)
|
||||
return
|
||||
}
|
||||
defer instance.Close()
|
||||
|
||||
// get number of pages inside PDF document
|
||||
doc, err := instance.OpenDocument(&requests.OpenDocument{
|
||||
FileReader: f,
|
||||
FileReaderSize: stat.Size(),
|
||||
})
|
||||
if err != nil {
|
||||
logging.LogErrorf("convert [%s] failed: [%s]", tmp, err)
|
||||
return
|
||||
}
|
||||
defer instance.FPDF_CloseDocument(&requests.FPDF_CloseDocument{
|
||||
Document: doc.Document,
|
||||
})
|
||||
|
||||
pageCount, err := instance.FPDF_GetPageCount(&requests.FPDF_GetPageCount{Document: doc.Document})
|
||||
if err != nil {
|
||||
logging.LogErrorf("convert [%s] failed: [%s]", tmp, err)
|
||||
return
|
||||
}
|
||||
// loop through pages and get content
|
||||
content := ""
|
||||
for page := 0; page < pageCount.PageCount; page++ {
|
||||
req := &requests.GetPageText{
|
||||
Page: requests.Page{
|
||||
ByIndex: &requests.PageByIndex{
|
||||
Document: doc.Document,
|
||||
Index: page,
|
||||
},
|
||||
},
|
||||
}
|
||||
pt, err := instance.GetPageText(req)
|
||||
if err != nil {
|
||||
logging.LogErrorf("convert [%s] failed: [%s]", tmp, err)
|
||||
return
|
||||
}
|
||||
content += " " + normalizeAssetContent(pt.Text)
|
||||
}
|
||||
|
||||
ret = &AssetParseResult{
|
||||
Content: content,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
|||
29
kernel/model/asset_content_test.go
Normal file
29
kernel/model/asset_content_test.go
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
// SiYuan - Refactor your thinking
|
||||
// Copyright (c) 2020-present, b3log.org
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package model
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPDFParser(t *testing.T) {
|
||||
p := &PdfAssetParser{}
|
||||
res := p.Parse("../testdata/parsertest.pdf")
|
||||
if res == nil || res.Content == "" {
|
||||
t.Fatalf("empty or nil PDF content result")
|
||||
}
|
||||
}
|
||||
BIN
kernel/testdata/parsertest.pdf
vendored
Normal file
BIN
kernel/testdata/parsertest.pdf
vendored
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue