This commit is contained in:
Daniel 2025-08-04 18:11:33 +08:00
parent 3caeeaf29d
commit bf465e5bbf
No known key found for this signature in database
GPG key ID: 86211BA83DF03017
9 changed files with 16 additions and 324 deletions

View file

@ -1,287 +0,0 @@
// SiYuan - Refactor your thinking
// Copyright (c) 2020-present, b3log.org
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package filesys
import (
"bytes"
"strings"
"github.com/88250/gulu"
"github.com/88250/lute/ast"
"github.com/88250/lute/editor"
"github.com/88250/lute/parse"
"github.com/siyuan-note/siyuan/kernel/treenode"
"github.com/siyuan-note/siyuan/kernel/util"
)
func ParseJSONWithoutFix(jsonData []byte, options *parse.Options) (ret *parse.Tree, err error) {
root := &ast.Node{}
err = unmarshalJSON(jsonData, root)
if err != nil {
return
}
ret = &parse.Tree{Name: "", ID: root.ID, Root: &ast.Node{Type: ast.NodeDocument, ID: root.ID, Spec: root.Spec}, Context: &parse.Context{ParseOption: options}}
ret.Root.KramdownIAL = parse.Map2IAL(root.Properties)
ret.Root.SetIALAttr("type", "doc")
ret.Context.Tip = ret.Root
if nil == root.Children {
return
}
idMap := map[string]bool{}
for _, child := range root.Children {
genTreeByJSON(child, ret, &idMap, nil, nil, true)
}
return
}
func ParseJSON(jsonData []byte, options *parse.Options) (ret *parse.Tree, needFix bool, err error) {
root := &ast.Node{}
err = unmarshalJSON(jsonData, root)
if err != nil {
return
}
ret = &parse.Tree{Name: "", ID: root.ID, Root: &ast.Node{Type: ast.NodeDocument, ID: root.ID, Spec: root.Spec}, Context: &parse.Context{ParseOption: options}}
if icon := root.Properties["icon"]; "" != icon {
// XSS through emoji name https://github.com/siyuan-note/siyuan/issues/15034
if newIcon := util.FilterUploadEmojiFileName(icon); newIcon != icon {
root.Properties["icon"] = newIcon
needFix = true
}
}
ret.Root.KramdownIAL = parse.Map2IAL(root.Properties)
ret.Root.SetIALAttr("type", "doc")
for _, kv := range ret.Root.KramdownIAL {
if strings.Contains(kv[1], "\n") {
val := kv[1]
val = strings.ReplaceAll(val, "\n", editor.IALValEscNewLine)
ret.Root.SetIALAttr(kv[0], val)
needFix = true
}
}
ret.Context.Tip = ret.Root
if nil == root.Children {
newPara := &ast.Node{Type: ast.NodeParagraph, ID: ast.NewNodeID()}
newPara.SetIALAttr("id", newPara.ID)
newPara.SetIALAttr("updated", newPara.ID[:14])
ret.Root.AppendChild(newPara)
needFix = true
return
}
needMigrate2Spec1 := false
idMap := map[string]bool{}
for _, child := range root.Children {
genTreeByJSON(child, ret, &idMap, &needFix, &needMigrate2Spec1, false)
}
if nil == ret.Root.FirstChild {
// 如果是空文档的话挂一个空段落上去
newP := treenode.NewParagraph("")
ret.Root.AppendChild(newP)
ret.Root.SetIALAttr("updated", newP.ID[:14])
}
if needMigrate2Spec1 {
parse.NestedInlines2FlattedSpans(ret, false)
needFix = true
}
return
}
func genTreeByJSON(node *ast.Node, tree *parse.Tree, idMap *map[string]bool, needFix, needMigrate2Spec1 *bool, ignoreFix bool) {
node.Tokens, node.Type = gulu.Str.ToBytes(node.Data), ast.Str2NodeType(node.TypeStr)
node.Data, node.TypeStr = "", ""
node.KramdownIAL = parse.Map2IAL(node.Properties)
node.Properties = nil
if !ignoreFix {
// 历史数据订正
if -1 == node.Type {
*needFix = true
node.Type = ast.NodeParagraph
node.AppendChild(&ast.Node{Type: ast.NodeText, Tokens: node.Tokens})
node.Children = nil
}
switch node.Type {
case ast.NodeList:
if 1 > len(node.Children) {
*needFix = true
return // 忽略空列表
}
case ast.NodeListItem:
if 1 > len(node.Children) {
*needFix = true
return // 忽略空列表项
}
case ast.NodeBlockquote:
if 2 > len(node.Children) {
*needFix = true
return // 忽略空引述
}
case ast.NodeSuperBlock:
if 4 > len(node.Children) {
*needFix = true
return // 忽略空超级块
}
case ast.NodeMathBlock:
if 1 > len(node.Children) {
*needFix = true
return // 忽略空公式
}
case ast.NodeBlockQueryEmbed:
if 1 > len(node.Children) {
*needFix = true
return // 忽略空查询嵌入块
}
case ast.NodeCodeBlock:
if 4 > len(node.Children) {
// https://ld246.com/article/1713689223067
existCode := false
for _, child := range node.Children {
if ast.NodeCodeBlockCode.String() == child.TypeStr {
existCode = true
break
}
}
if !existCode {
*needFix = true
return // 忽略空代码块
}
}
}
fixLegacyData(tree.Context.Tip, node, idMap, needFix, needMigrate2Spec1)
}
tree.Context.Tip.AppendChild(node)
tree.Context.Tip = node
defer tree.Context.ParentTip()
if nil == node.Children {
return
}
for _, child := range node.Children {
genTreeByJSON(child, tree, idMap, needFix, needMigrate2Spec1, ignoreFix)
}
node.Children = nil
}
func fixLegacyData(tip, node *ast.Node, idMap *map[string]bool, needFix, needMigrate2Spec1 *bool) {
if node.IsBlock() {
if "" == node.ID {
node.ID = ast.NewNodeID()
node.SetIALAttr("id", node.ID)
*needFix = true
}
if node.ID != node.IALAttr("id") {
//某些情况下会导致 ID 和属性 id 不相同 https://ld246.com/article/1722826829447
node.SetIALAttr("id", node.ID)
*needFix = true
}
if 0 < len(node.Children) && ast.NodeBr.String() == node.Children[len(node.Children)-1].TypeStr {
// 剔除块尾多余的软换行 https://github.com/siyuan-note/siyuan/issues/6191
node.Children = node.Children[:len(node.Children)-1]
*needFix = true
}
}
if "" != node.ID {
if _, ok := (*idMap)[node.ID]; ok {
node.ID = ast.NewNodeID()
node.SetIALAttr("id", node.ID)
*needFix = true
}
(*idMap)[node.ID] = true
}
switch node.Type {
case ast.NodeIFrame:
if bytes.Contains(node.Tokens, gulu.Str.ToBytes("iframe-content")) {
start := bytes.Index(node.Tokens, gulu.Str.ToBytes("<iframe"))
end := bytes.Index(node.Tokens, gulu.Str.ToBytes("</iframe>"))
node.Tokens = node.Tokens[start : end+9]
*needFix = true
}
case ast.NodeWidget:
if bytes.Contains(node.Tokens, gulu.Str.ToBytes("http://127.0.0.1:6806")) {
node.Tokens = bytes.ReplaceAll(node.Tokens, []byte("http://127.0.0.1:6806"), nil)
*needFix = true
}
case ast.NodeList:
if nil != node.ListData && 3 != node.ListData.Typ && 0 < len(node.Children) &&
nil != node.Children[0].ListData && 3 == node.Children[0].ListData.Typ {
node.ListData.Typ = 3
*needFix = true
}
case ast.NodeMark:
if 3 == len(node.Children) && "NodeText" == node.Children[1].TypeStr {
if strings.HasPrefix(node.Children[1].Data, " ") || strings.HasSuffix(node.Children[1].Data, " ") {
node.Children[1].Data = strings.TrimSpace(node.Children[1].Data)
*needFix = true
}
}
case ast.NodeHeading:
if 6 < node.HeadingLevel {
node.HeadingLevel = 6
*needFix = true
}
case ast.NodeLinkDest:
if bytes.HasPrefix(node.Tokens, []byte("assets/")) && bytes.HasSuffix(node.Tokens, []byte(" ")) {
node.Tokens = bytes.TrimSpace(node.Tokens)
*needFix = true
}
case ast.NodeText:
if nil != tip.LastChild && ast.NodeTagOpenMarker == tip.LastChild.Type && 1 > len(node.Tokens) {
node.Tokens = []byte("Untitled")
*needFix = true
}
case ast.NodeTagCloseMarker:
if nil != tip.LastChild {
if ast.NodeTagOpenMarker == tip.LastChild.Type {
tip.AppendChild(&ast.Node{Type: ast.NodeText, Tokens: []byte("Untitled")})
*needFix = true
} else if "" == tip.LastChild.Text() {
tip.LastChild.Type = ast.NodeText
tip.LastChild.Tokens = []byte("Untitled")
*needFix = true
}
}
case ast.NodeBlockRef:
// 建立索引时无法解析 `v2.2.0-` 版本的块引用 https://github.com/siyuan-note/siyuan/issues/6889
// 早先的迁移程序有缺陷,漏迁移了块引用节点,这里检测到块引用节点后标识需要迁移
*needMigrate2Spec1 = true
case ast.NodeInlineHTML:
*needFix = true
node.Type = ast.NodeHTMLBlock
}
for _, kv := range node.KramdownIAL {
if strings.Contains(kv[1], "\n") {
val := kv[1]
val = strings.ReplaceAll(val, "\n", editor.IALValEscNewLine)
node.SetIALAttr(kv[0], val)
*needFix = true
}
}
}

View file

@ -1,25 +0,0 @@
// SiYuan - Refactor your thinking
// Copyright (c) 2020-present, b3log.org
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package filesys
import (
"github.com/goccy/go-json"
)
func unmarshalJSON(data []byte, v interface{}) error {
return json.Unmarshal(data, v)
}

View file

@ -32,6 +32,7 @@ import (
"github.com/88250/lute/render"
jsoniter "github.com/json-iterator/go"
"github.com/panjf2000/ants/v2"
"github.com/siyuan-note/dataparser"
"github.com/siyuan-note/filelock"
"github.com/siyuan-note/logging"
"github.com/siyuan-note/siyuan/kernel/cache"
@ -268,7 +269,7 @@ func afterWriteTree(tree *parse.Tree) {
func parseJSON2Tree(boxID, p string, jsonData []byte, luteEngine *lute.Lute) (ret *parse.Tree) {
var err error
var needFix bool
ret, needFix, err = ParseJSON(jsonData, luteEngine.ParseOptions)
ret, needFix, err = dataparser.ParseJSON(jsonData, luteEngine.ParseOptions)
if err != nil {
logging.LogErrorf("parse json [%s] to tree failed: %s", boxID+p, err)
return

View file

@ -1,8 +1,6 @@
module github.com/siyuan-note/siyuan/kernel
go 1.24
toolchain go1.24.1
go 1.24.4
require (
code.sajari.com/docconv v1.3.8
@ -37,7 +35,6 @@ require (
github.com/gin-contrib/sse v1.1.0
github.com/gin-gonic/gin v1.10.1
github.com/go-ole/go-ole v1.3.0
github.com/goccy/go-json v0.10.5
github.com/gofrs/flock v0.12.1
github.com/golang-jwt/jwt/v5 v5.2.2
github.com/gorilla/css v1.0.1
@ -59,6 +56,7 @@ require (
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
github.com/sashabaranov/go-openai v1.40.3
github.com/shirou/gopsutil/v4 v4.25.7
github.com/siyuan-note/dataparser v0.0.0-20250804100744-b41253b236f3
github.com/siyuan-note/dejavu v0.0.0-20250802020836-44a543706899
github.com/siyuan-note/encryption v0.0.0-20250326023622-24a67e6956ec
github.com/siyuan-note/eventbus v0.0.0-20240627125516-396fdb0f0f97
@ -128,6 +126,7 @@ require (
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.26.0 // indirect
github.com/go-resty/resty/v2 v2.16.5 // indirect
github.com/goccy/go-json v0.10.5 // indirect
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
github.com/google/pprof v0.0.0-20250607225305-033d6d78b36a // indirect
github.com/google/uuid v1.6.0 // indirect

View file

@ -375,6 +375,8 @@ github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+D
github.com/shurcooL/gofontwoff v0.0.0-20181114050219-180f79e6909d h1:lvCTyBbr36+tqMccdGMwuEU+hjux/zL6xSmf5S9ITaA=
github.com/shurcooL/gofontwoff v0.0.0-20181114050219-180f79e6909d/go.mod h1:05UtEgK5zq39gLST6uB0cf3NEHjETfB4Fgr3Gx5R9Vw=
github.com/simplereach/timeutils v1.2.0/go.mod h1:VVbQDfN/FHRZa1LSqcwo4kNZ62OOyqLLGQKYB3pB0Q8=
github.com/siyuan-note/dataparser v0.0.0-20250804100744-b41253b236f3 h1:EH063L0HD1f82DvddurUmEXS0obXypv8pQrcaC/zNgI=
github.com/siyuan-note/dataparser v0.0.0-20250804100744-b41253b236f3/go.mod h1:8lb+SsWAPQblGbjmwEBsBdJszMCcLeECtB95fv6mReg=
github.com/siyuan-note/dejavu v0.0.0-20250802020836-44a543706899 h1:T5ZVZCD1ShUUnx9riyOHrqG/b2dfdS6UJkGgGSjDIoM=
github.com/siyuan-note/dejavu v0.0.0-20250802020836-44a543706899/go.mod h1:Akc3+4tevjI8/HmBP0ZhpvmR8z6MOfRzKdn6v38htp8=
github.com/siyuan-note/encryption v0.0.0-20250326023622-24a67e6956ec h1:D8Sjwa+7WxP3XrIBscT4PxBZZddZ83/O+5nX1sq6g6g=

View file

@ -35,12 +35,12 @@ import (
"github.com/88250/lute/ast"
"github.com/88250/lute/parse"
"github.com/88250/lute/render"
"github.com/siyuan-note/dataparser"
"github.com/siyuan-note/eventbus"
"github.com/siyuan-note/filelock"
"github.com/siyuan-note/logging"
"github.com/siyuan-note/siyuan/kernel/cache"
"github.com/siyuan-note/siyuan/kernel/conf"
"github.com/siyuan-note/siyuan/kernel/filesys"
"github.com/siyuan-note/siyuan/kernel/search"
"github.com/siyuan-note/siyuan/kernel/sql"
"github.com/siyuan-note/siyuan/kernel/task"
@ -169,7 +169,7 @@ func GetDocHistoryContent(historyPath, keyword string, highlight bool) (id, root
isLargeDoc = 1024*1024*1 <= len(data)
luteEngine := NewLute()
historyTree, err := filesys.ParseJSONWithoutFix(data, luteEngine.ParseOptions)
historyTree, err := dataparser.ParseJSONWithoutFix(data, luteEngine.ParseOptions)
if err != nil {
logging.LogErrorf("parse tree from file [%s] failed: %s", historyPath, err)
return

View file

@ -44,6 +44,7 @@ import (
"github.com/88250/lute/parse"
"github.com/88250/lute/render"
util2 "github.com/88250/lute/util"
"github.com/siyuan-note/dataparser"
"github.com/siyuan-note/filelock"
"github.com/siyuan-note/logging"
"github.com/siyuan-note/riff"
@ -163,7 +164,7 @@ func ImportSY(zipPath, boxID, toPath string) (err error) {
err = readErr
return
}
tree, _, parseErr := filesys.ParseJSON(data, luteEngine.ParseOptions)
tree, _, parseErr := dataparser.ParseJSON(data, luteEngine.ParseOptions)
if nil != parseErr {
logging.LogErrorf("parse .sy [%s] failed: %s", syPath, parseErr)
err = parseErr

View file

@ -45,6 +45,7 @@ import (
"github.com/88250/lute/parse"
"github.com/88250/lute/render"
"github.com/emirpasic/gods/sets/hashset"
"github.com/siyuan-note/dataparser"
"github.com/siyuan-note/dejavu"
"github.com/siyuan-note/dejavu/cloud"
"github.com/siyuan-note/dejavu/entity"
@ -53,7 +54,6 @@ import (
"github.com/siyuan-note/httpclient"
"github.com/siyuan-note/logging"
"github.com/siyuan-note/siyuan/kernel/conf"
"github.com/siyuan-note/siyuan/kernel/filesys"
"github.com/siyuan-note/siyuan/kernel/task"
"github.com/siyuan-note/siyuan/kernel/treenode"
"github.com/siyuan-note/siyuan/kernel/util"
@ -425,7 +425,7 @@ func parseTitleInSnapshot(fileID string, repo *dejavu.Repo, luteEngine *lute.Lut
}
var tree *parse.Tree
tree, err = filesys.ParseJSONWithoutFix(data, luteEngine.ParseOptions)
tree, err = dataparser.ParseJSONWithoutFix(data, luteEngine.ParseOptions)
if err != nil {
logging.LogErrorf("parse file [%s] failed: %s", fileID, err)
return
@ -438,7 +438,7 @@ func parseTitleInSnapshot(fileID string, repo *dejavu.Repo, luteEngine *lute.Lut
func parseTreeInSnapshot(data []byte, luteEngine *lute.Lute) (isLargeDoc bool, tree *parse.Tree, err error) {
isLargeDoc = 1024*1024*1 <= len(data)
tree, err = filesys.ParseJSONWithoutFix(data, luteEngine.ParseOptions)
tree, err = dataparser.ParseJSONWithoutFix(data, luteEngine.ParseOptions)
if err != nil {
return
}

View file

@ -28,6 +28,7 @@ import (
"github.com/88250/lute"
"github.com/88250/lute/ast"
"github.com/88250/lute/parse"
"github.com/siyuan-note/dataparser"
"github.com/siyuan-note/filelock"
"github.com/siyuan-note/logging"
"github.com/siyuan-note/siyuan/kernel/av"
@ -162,7 +163,7 @@ func loadTree(localPath string, luteEngine *lute.Lute) (ret *parse.Tree, err err
return
}
ret, err = filesys.ParseJSONWithoutFix(data, luteEngine.ParseOptions)
ret, err = dataparser.ParseJSONWithoutFix(data, luteEngine.ParseOptions)
if err != nil {
logging.LogErrorf("parse json to tree [%s] failed: %s", localPath, err)
return