From bf465e5bbfeac8d00626845145f4eb77492024c4 Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Mon, 4 Aug 2025 18:11:33 +0800 Subject: [PATCH] :recycle: Move .sy data parser to https://github.com/siyuan-note/dataparser --- kernel/filesys/json_parser.go | 287 ------------------------------- kernel/filesys/json_unmarshal.go | 25 --- kernel/filesys/tree.go | 3 +- kernel/go.mod | 7 +- kernel/go.sum | 2 + kernel/model/history.go | 4 +- kernel/model/import.go | 3 +- kernel/model/repository.go | 6 +- kernel/model/tree.go | 3 +- 9 files changed, 16 insertions(+), 324 deletions(-) delete mode 100644 kernel/filesys/json_parser.go delete mode 100644 kernel/filesys/json_unmarshal.go diff --git a/kernel/filesys/json_parser.go b/kernel/filesys/json_parser.go deleted file mode 100644 index 6646cf604..000000000 --- a/kernel/filesys/json_parser.go +++ /dev/null @@ -1,287 +0,0 @@ -// SiYuan - Refactor your thinking -// Copyright (c) 2020-present, b3log.org -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package filesys - -import ( - "bytes" - "strings" - - "github.com/88250/gulu" - "github.com/88250/lute/ast" - "github.com/88250/lute/editor" - "github.com/88250/lute/parse" - "github.com/siyuan-note/siyuan/kernel/treenode" - "github.com/siyuan-note/siyuan/kernel/util" -) - -func ParseJSONWithoutFix(jsonData []byte, options *parse.Options) (ret *parse.Tree, err error) { - root := &ast.Node{} - err = unmarshalJSON(jsonData, root) - if err != nil { - return - } - - ret = &parse.Tree{Name: "", ID: root.ID, Root: &ast.Node{Type: ast.NodeDocument, ID: root.ID, Spec: root.Spec}, Context: &parse.Context{ParseOption: options}} - ret.Root.KramdownIAL = parse.Map2IAL(root.Properties) - ret.Root.SetIALAttr("type", "doc") - ret.Context.Tip = ret.Root - if nil == root.Children { - return - } - - idMap := map[string]bool{} - for _, child := range root.Children { - genTreeByJSON(child, ret, &idMap, nil, nil, true) - } - return -} - -func ParseJSON(jsonData []byte, options *parse.Options) (ret *parse.Tree, needFix bool, err error) { - root := &ast.Node{} - err = unmarshalJSON(jsonData, root) - if err != nil { - return - } - - ret = &parse.Tree{Name: "", ID: root.ID, Root: &ast.Node{Type: ast.NodeDocument, ID: root.ID, Spec: root.Spec}, Context: &parse.Context{ParseOption: options}} - if icon := root.Properties["icon"]; "" != icon { - // XSS through emoji name https://github.com/siyuan-note/siyuan/issues/15034 - if newIcon := util.FilterUploadEmojiFileName(icon); newIcon != icon { - root.Properties["icon"] = newIcon - needFix = true - } - } - - ret.Root.KramdownIAL = parse.Map2IAL(root.Properties) - ret.Root.SetIALAttr("type", "doc") - for _, kv := range ret.Root.KramdownIAL { - if strings.Contains(kv[1], "\n") { - val := kv[1] - val = strings.ReplaceAll(val, "\n", editor.IALValEscNewLine) - ret.Root.SetIALAttr(kv[0], val) - needFix = true - } - } - - ret.Context.Tip = ret.Root - if nil == root.Children { - newPara := &ast.Node{Type: ast.NodeParagraph, ID: ast.NewNodeID()} - newPara.SetIALAttr("id", newPara.ID) - newPara.SetIALAttr("updated", newPara.ID[:14]) - ret.Root.AppendChild(newPara) - needFix = true - return - } - - needMigrate2Spec1 := false - idMap := map[string]bool{} - for _, child := range root.Children { - genTreeByJSON(child, ret, &idMap, &needFix, &needMigrate2Spec1, false) - } - - if nil == ret.Root.FirstChild { - // 如果是空文档的话挂一个空段落上去 - newP := treenode.NewParagraph("") - ret.Root.AppendChild(newP) - ret.Root.SetIALAttr("updated", newP.ID[:14]) - } - - if needMigrate2Spec1 { - parse.NestedInlines2FlattedSpans(ret, false) - needFix = true - } - return -} - -func genTreeByJSON(node *ast.Node, tree *parse.Tree, idMap *map[string]bool, needFix, needMigrate2Spec1 *bool, ignoreFix bool) { - node.Tokens, node.Type = gulu.Str.ToBytes(node.Data), ast.Str2NodeType(node.TypeStr) - node.Data, node.TypeStr = "", "" - node.KramdownIAL = parse.Map2IAL(node.Properties) - node.Properties = nil - - if !ignoreFix { - // 历史数据订正 - - if -1 == node.Type { - *needFix = true - node.Type = ast.NodeParagraph - node.AppendChild(&ast.Node{Type: ast.NodeText, Tokens: node.Tokens}) - node.Children = nil - } - - switch node.Type { - case ast.NodeList: - if 1 > len(node.Children) { - *needFix = true - return // 忽略空列表 - } - case ast.NodeListItem: - if 1 > len(node.Children) { - *needFix = true - return // 忽略空列表项 - } - case ast.NodeBlockquote: - if 2 > len(node.Children) { - *needFix = true - return // 忽略空引述 - } - case ast.NodeSuperBlock: - if 4 > len(node.Children) { - *needFix = true - return // 忽略空超级块 - } - case ast.NodeMathBlock: - if 1 > len(node.Children) { - *needFix = true - return // 忽略空公式 - } - case ast.NodeBlockQueryEmbed: - if 1 > len(node.Children) { - *needFix = true - return // 忽略空查询嵌入块 - } - case ast.NodeCodeBlock: - if 4 > len(node.Children) { - // https://ld246.com/article/1713689223067 - existCode := false - for _, child := range node.Children { - if ast.NodeCodeBlockCode.String() == child.TypeStr { - existCode = true - break - } - } - if !existCode { - *needFix = true - return // 忽略空代码块 - } - } - } - - fixLegacyData(tree.Context.Tip, node, idMap, needFix, needMigrate2Spec1) - } - - tree.Context.Tip.AppendChild(node) - tree.Context.Tip = node - defer tree.Context.ParentTip() - if nil == node.Children { - return - } - for _, child := range node.Children { - genTreeByJSON(child, tree, idMap, needFix, needMigrate2Spec1, ignoreFix) - } - node.Children = nil -} - -func fixLegacyData(tip, node *ast.Node, idMap *map[string]bool, needFix, needMigrate2Spec1 *bool) { - if node.IsBlock() { - if "" == node.ID { - node.ID = ast.NewNodeID() - node.SetIALAttr("id", node.ID) - *needFix = true - } - - if node.ID != node.IALAttr("id") { - //某些情况下会导致 ID 和属性 id 不相同 https://ld246.com/article/1722826829447 - node.SetIALAttr("id", node.ID) - *needFix = true - } - - if 0 < len(node.Children) && ast.NodeBr.String() == node.Children[len(node.Children)-1].TypeStr { - // 剔除块尾多余的软换行 https://github.com/siyuan-note/siyuan/issues/6191 - node.Children = node.Children[:len(node.Children)-1] - *needFix = true - } - } - if "" != node.ID { - if _, ok := (*idMap)[node.ID]; ok { - node.ID = ast.NewNodeID() - node.SetIALAttr("id", node.ID) - *needFix = true - } - (*idMap)[node.ID] = true - } - - switch node.Type { - case ast.NodeIFrame: - if bytes.Contains(node.Tokens, gulu.Str.ToBytes("iframe-content")) { - start := bytes.Index(node.Tokens, gulu.Str.ToBytes("")) - node.Tokens = node.Tokens[start : end+9] - *needFix = true - } - case ast.NodeWidget: - if bytes.Contains(node.Tokens, gulu.Str.ToBytes("http://127.0.0.1:6806")) { - node.Tokens = bytes.ReplaceAll(node.Tokens, []byte("http://127.0.0.1:6806"), nil) - *needFix = true - } - case ast.NodeList: - if nil != node.ListData && 3 != node.ListData.Typ && 0 < len(node.Children) && - nil != node.Children[0].ListData && 3 == node.Children[0].ListData.Typ { - node.ListData.Typ = 3 - *needFix = true - } - case ast.NodeMark: - if 3 == len(node.Children) && "NodeText" == node.Children[1].TypeStr { - if strings.HasPrefix(node.Children[1].Data, " ") || strings.HasSuffix(node.Children[1].Data, " ") { - node.Children[1].Data = strings.TrimSpace(node.Children[1].Data) - *needFix = true - } - } - case ast.NodeHeading: - if 6 < node.HeadingLevel { - node.HeadingLevel = 6 - *needFix = true - } - case ast.NodeLinkDest: - if bytes.HasPrefix(node.Tokens, []byte("assets/")) && bytes.HasSuffix(node.Tokens, []byte(" ")) { - node.Tokens = bytes.TrimSpace(node.Tokens) - *needFix = true - } - case ast.NodeText: - if nil != tip.LastChild && ast.NodeTagOpenMarker == tip.LastChild.Type && 1 > len(node.Tokens) { - node.Tokens = []byte("Untitled") - *needFix = true - } - case ast.NodeTagCloseMarker: - if nil != tip.LastChild { - if ast.NodeTagOpenMarker == tip.LastChild.Type { - tip.AppendChild(&ast.Node{Type: ast.NodeText, Tokens: []byte("Untitled")}) - *needFix = true - } else if "" == tip.LastChild.Text() { - tip.LastChild.Type = ast.NodeText - tip.LastChild.Tokens = []byte("Untitled") - *needFix = true - } - } - case ast.NodeBlockRef: - // 建立索引时无法解析 `v2.2.0-` 版本的块引用 https://github.com/siyuan-note/siyuan/issues/6889 - // 早先的迁移程序有缺陷,漏迁移了块引用节点,这里检测到块引用节点后标识需要迁移 - *needMigrate2Spec1 = true - case ast.NodeInlineHTML: - *needFix = true - node.Type = ast.NodeHTMLBlock - } - - for _, kv := range node.KramdownIAL { - if strings.Contains(kv[1], "\n") { - val := kv[1] - val = strings.ReplaceAll(val, "\n", editor.IALValEscNewLine) - node.SetIALAttr(kv[0], val) - *needFix = true - } - } -} diff --git a/kernel/filesys/json_unmarshal.go b/kernel/filesys/json_unmarshal.go deleted file mode 100644 index b01ab495a..000000000 --- a/kernel/filesys/json_unmarshal.go +++ /dev/null @@ -1,25 +0,0 @@ -// SiYuan - Refactor your thinking -// Copyright (c) 2020-present, b3log.org -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package filesys - -import ( - "github.com/goccy/go-json" -) - -func unmarshalJSON(data []byte, v interface{}) error { - return json.Unmarshal(data, v) -} diff --git a/kernel/filesys/tree.go b/kernel/filesys/tree.go index 2ebe84275..a904bde9c 100644 --- a/kernel/filesys/tree.go +++ b/kernel/filesys/tree.go @@ -32,6 +32,7 @@ import ( "github.com/88250/lute/render" jsoniter "github.com/json-iterator/go" "github.com/panjf2000/ants/v2" + "github.com/siyuan-note/dataparser" "github.com/siyuan-note/filelock" "github.com/siyuan-note/logging" "github.com/siyuan-note/siyuan/kernel/cache" @@ -268,7 +269,7 @@ func afterWriteTree(tree *parse.Tree) { func parseJSON2Tree(boxID, p string, jsonData []byte, luteEngine *lute.Lute) (ret *parse.Tree) { var err error var needFix bool - ret, needFix, err = ParseJSON(jsonData, luteEngine.ParseOptions) + ret, needFix, err = dataparser.ParseJSON(jsonData, luteEngine.ParseOptions) if err != nil { logging.LogErrorf("parse json [%s] to tree failed: %s", boxID+p, err) return diff --git a/kernel/go.mod b/kernel/go.mod index 3b244d764..2aaf993e2 100644 --- a/kernel/go.mod +++ b/kernel/go.mod @@ -1,8 +1,6 @@ module github.com/siyuan-note/siyuan/kernel -go 1.24 - -toolchain go1.24.1 +go 1.24.4 require ( code.sajari.com/docconv v1.3.8 @@ -37,7 +35,6 @@ require ( github.com/gin-contrib/sse v1.1.0 github.com/gin-gonic/gin v1.10.1 github.com/go-ole/go-ole v1.3.0 - github.com/goccy/go-json v0.10.5 github.com/gofrs/flock v0.12.1 github.com/golang-jwt/jwt/v5 v5.2.2 github.com/gorilla/css v1.0.1 @@ -59,6 +56,7 @@ require ( github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 github.com/sashabaranov/go-openai v1.40.3 github.com/shirou/gopsutil/v4 v4.25.7 + github.com/siyuan-note/dataparser v0.0.0-20250804100744-b41253b236f3 github.com/siyuan-note/dejavu v0.0.0-20250802020836-44a543706899 github.com/siyuan-note/encryption v0.0.0-20250326023622-24a67e6956ec github.com/siyuan-note/eventbus v0.0.0-20240627125516-396fdb0f0f97 @@ -128,6 +126,7 @@ require ( github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.26.0 // indirect github.com/go-resty/resty/v2 v2.16.5 // indirect + github.com/goccy/go-json v0.10.5 // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect github.com/google/pprof v0.0.0-20250607225305-033d6d78b36a // indirect github.com/google/uuid v1.6.0 // indirect diff --git a/kernel/go.sum b/kernel/go.sum index 121178d50..55c3db3d2 100644 --- a/kernel/go.sum +++ b/kernel/go.sum @@ -375,6 +375,8 @@ github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+D github.com/shurcooL/gofontwoff v0.0.0-20181114050219-180f79e6909d h1:lvCTyBbr36+tqMccdGMwuEU+hjux/zL6xSmf5S9ITaA= github.com/shurcooL/gofontwoff v0.0.0-20181114050219-180f79e6909d/go.mod h1:05UtEgK5zq39gLST6uB0cf3NEHjETfB4Fgr3Gx5R9Vw= github.com/simplereach/timeutils v1.2.0/go.mod h1:VVbQDfN/FHRZa1LSqcwo4kNZ62OOyqLLGQKYB3pB0Q8= +github.com/siyuan-note/dataparser v0.0.0-20250804100744-b41253b236f3 h1:EH063L0HD1f82DvddurUmEXS0obXypv8pQrcaC/zNgI= +github.com/siyuan-note/dataparser v0.0.0-20250804100744-b41253b236f3/go.mod h1:8lb+SsWAPQblGbjmwEBsBdJszMCcLeECtB95fv6mReg= github.com/siyuan-note/dejavu v0.0.0-20250802020836-44a543706899 h1:T5ZVZCD1ShUUnx9riyOHrqG/b2dfdS6UJkGgGSjDIoM= github.com/siyuan-note/dejavu v0.0.0-20250802020836-44a543706899/go.mod h1:Akc3+4tevjI8/HmBP0ZhpvmR8z6MOfRzKdn6v38htp8= github.com/siyuan-note/encryption v0.0.0-20250326023622-24a67e6956ec h1:D8Sjwa+7WxP3XrIBscT4PxBZZddZ83/O+5nX1sq6g6g= diff --git a/kernel/model/history.go b/kernel/model/history.go index b0f8e2f19..2eea8d837 100644 --- a/kernel/model/history.go +++ b/kernel/model/history.go @@ -35,12 +35,12 @@ import ( "github.com/88250/lute/ast" "github.com/88250/lute/parse" "github.com/88250/lute/render" + "github.com/siyuan-note/dataparser" "github.com/siyuan-note/eventbus" "github.com/siyuan-note/filelock" "github.com/siyuan-note/logging" "github.com/siyuan-note/siyuan/kernel/cache" "github.com/siyuan-note/siyuan/kernel/conf" - "github.com/siyuan-note/siyuan/kernel/filesys" "github.com/siyuan-note/siyuan/kernel/search" "github.com/siyuan-note/siyuan/kernel/sql" "github.com/siyuan-note/siyuan/kernel/task" @@ -169,7 +169,7 @@ func GetDocHistoryContent(historyPath, keyword string, highlight bool) (id, root isLargeDoc = 1024*1024*1 <= len(data) luteEngine := NewLute() - historyTree, err := filesys.ParseJSONWithoutFix(data, luteEngine.ParseOptions) + historyTree, err := dataparser.ParseJSONWithoutFix(data, luteEngine.ParseOptions) if err != nil { logging.LogErrorf("parse tree from file [%s] failed: %s", historyPath, err) return diff --git a/kernel/model/import.go b/kernel/model/import.go index f734fa3a3..51c78876d 100644 --- a/kernel/model/import.go +++ b/kernel/model/import.go @@ -44,6 +44,7 @@ import ( "github.com/88250/lute/parse" "github.com/88250/lute/render" util2 "github.com/88250/lute/util" + "github.com/siyuan-note/dataparser" "github.com/siyuan-note/filelock" "github.com/siyuan-note/logging" "github.com/siyuan-note/riff" @@ -163,7 +164,7 @@ func ImportSY(zipPath, boxID, toPath string) (err error) { err = readErr return } - tree, _, parseErr := filesys.ParseJSON(data, luteEngine.ParseOptions) + tree, _, parseErr := dataparser.ParseJSON(data, luteEngine.ParseOptions) if nil != parseErr { logging.LogErrorf("parse .sy [%s] failed: %s", syPath, parseErr) err = parseErr diff --git a/kernel/model/repository.go b/kernel/model/repository.go index b84b468f5..bbb6b8f7b 100644 --- a/kernel/model/repository.go +++ b/kernel/model/repository.go @@ -45,6 +45,7 @@ import ( "github.com/88250/lute/parse" "github.com/88250/lute/render" "github.com/emirpasic/gods/sets/hashset" + "github.com/siyuan-note/dataparser" "github.com/siyuan-note/dejavu" "github.com/siyuan-note/dejavu/cloud" "github.com/siyuan-note/dejavu/entity" @@ -53,7 +54,6 @@ import ( "github.com/siyuan-note/httpclient" "github.com/siyuan-note/logging" "github.com/siyuan-note/siyuan/kernel/conf" - "github.com/siyuan-note/siyuan/kernel/filesys" "github.com/siyuan-note/siyuan/kernel/task" "github.com/siyuan-note/siyuan/kernel/treenode" "github.com/siyuan-note/siyuan/kernel/util" @@ -425,7 +425,7 @@ func parseTitleInSnapshot(fileID string, repo *dejavu.Repo, luteEngine *lute.Lut } var tree *parse.Tree - tree, err = filesys.ParseJSONWithoutFix(data, luteEngine.ParseOptions) + tree, err = dataparser.ParseJSONWithoutFix(data, luteEngine.ParseOptions) if err != nil { logging.LogErrorf("parse file [%s] failed: %s", fileID, err) return @@ -438,7 +438,7 @@ func parseTitleInSnapshot(fileID string, repo *dejavu.Repo, luteEngine *lute.Lut func parseTreeInSnapshot(data []byte, luteEngine *lute.Lute) (isLargeDoc bool, tree *parse.Tree, err error) { isLargeDoc = 1024*1024*1 <= len(data) - tree, err = filesys.ParseJSONWithoutFix(data, luteEngine.ParseOptions) + tree, err = dataparser.ParseJSONWithoutFix(data, luteEngine.ParseOptions) if err != nil { return } diff --git a/kernel/model/tree.go b/kernel/model/tree.go index cd26fb03b..742538d03 100644 --- a/kernel/model/tree.go +++ b/kernel/model/tree.go @@ -28,6 +28,7 @@ import ( "github.com/88250/lute" "github.com/88250/lute/ast" "github.com/88250/lute/parse" + "github.com/siyuan-note/dataparser" "github.com/siyuan-note/filelock" "github.com/siyuan-note/logging" "github.com/siyuan-note/siyuan/kernel/av" @@ -162,7 +163,7 @@ func loadTree(localPath string, luteEngine *lute.Lute) (ret *parse.Tree, err err return } - ret, err = filesys.ParseJSONWithoutFix(data, luteEngine.ParseOptions) + ret, err = dataparser.ParseJSONWithoutFix(data, luteEngine.ParseOptions) if err != nil { logging.LogErrorf("parse json to tree [%s] failed: %s", localPath, err) return