From f07ee114d1a1a93c0241cff40c45a361246045dc Mon Sep 17 00:00:00 2001 From: Liang Ding Date: Thu, 16 Feb 2023 13:14:15 +0800 Subject: [PATCH] =?UTF-8?q?:zap:=20=E6=94=B9=E8=BF=9B=E6=89=93=E5=BC=80?= =?UTF-8?q?=E8=99=9A=E6=8B=9F=E5=BC=95=E7=94=A8=E5=90=8E=E5=8A=A0=E8=BD=BD?= =?UTF-8?q?=E6=96=87=E6=A1=A3=E7=9A=84=E6=80=A7=E8=83=BD=20https://github.?= =?UTF-8?q?com/siyuan-note/siyuan/issues/7378?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kernel/job/cron.go | 1 + kernel/model/file.go | 18 +--- kernel/model/virutalref.go | 181 +++++++++++++++++++++++++++++++++++-- 3 files changed, 177 insertions(+), 23 deletions(-) diff --git a/kernel/job/cron.go b/kernel/job/cron.go index ffb98438d..1a7b6c892 100644 --- a/kernel/job/cron.go +++ b/kernel/job/cron.go @@ -39,6 +39,7 @@ func StartCron() { go every(util.SQLFlushInterval, sql.FlushTxJob) go every(10*time.Minute, model.FixIndexJob) go every(10*time.Minute, model.IndexEmbedBlockJob) + go every(10*time.Minute, model.CacheVirtualBlockRefJob) go every(12*time.Second, model.OCRAssetsJob) go every(12*time.Second, model.FlushAssetsTextsJob) go every(30*time.Second, model.HookDesktopUIProcJob) diff --git a/kernel/model/file.go b/kernel/model/file.go index bb7521dc2..ece4df246 100644 --- a/kernel/model/file.go +++ b/kernel/model/file.go @@ -440,19 +440,6 @@ func StatTree(id string) (ret *util.BlockStatResult) { } } -const ( - searchMarkDataType = "search-mark" - virtualBlockRefDataType = "virtual-block-ref" -) - -func getMarkSpanStart(dataType string) string { - return fmt.Sprintf("", dataType) -} - -func getMarkSpanEnd() string { - return "" -} - func GetDoc(startID, endID, id string, index int, keyword string, mode int, size int, isBacklink bool) (blockCount, childBlockCount int, dom, parentID, parent2ID, rootID, typ string, eof bool, boxID, docPath string, isBacklinkExpand bool, err error) { //os.MkdirAll("pprof", 0755) //cpuProfile, _ := os.Create("pprof/GetDoc") @@ -618,7 +605,8 @@ func GetDoc(startID, endID, id string, index int, keyword string, mode int, size } refCount := sql.QueryRootChildrenRefCount(rootID) - virtualBlockRefKeywords := getVirtualRefKeywords(tree.Root.IALAttr("title")) + //virtualBlockRefKeywords := getVirtualRefKeywords(tree.Root.IALAttr("title")) + virtualBlockRefKeywords := getBlockVirtualRefKeywords(tree.Root) subTree := &parse.Tree{ID: rootID, Root: &ast.Node{Type: ast.NodeDocument}, Marks: tree.Marks} keyword = strings.Join(strings.Split(keyword, " "), search.TermSep) @@ -659,7 +647,7 @@ func GetDoc(startID, endID, id string, index int, keyword string, mode int, size } } if hitBlock { - if markReplaceSpan(n, &unlinks, keywords, searchMarkDataType, luteEngine) { + if markReplaceSpan(n, &unlinks, keywords, search.MarkDataType, luteEngine) { return ast.WalkContinue } } diff --git a/kernel/model/virutalref.go b/kernel/model/virutalref.go index 6e3a1e2e5..fe9f97262 100644 --- a/kernel/model/virutalref.go +++ b/kernel/model/virutalref.go @@ -17,20 +17,171 @@ package model import ( - "regexp" - "sort" - "strings" - + "bytes" "github.com/88250/gulu" "github.com/88250/lute" "github.com/88250/lute/ast" "github.com/88250/lute/parse" + "github.com/dgraph-io/ristretto" + "github.com/panjf2000/ants/v2" + "github.com/siyuan-note/logging" + "github.com/siyuan-note/siyuan/kernel/filesys" + "github.com/siyuan-note/siyuan/kernel/search" "github.com/siyuan-note/siyuan/kernel/sql" "github.com/siyuan-note/siyuan/kernel/treenode" + "github.com/siyuan-note/siyuan/kernel/util" + "os" + "path/filepath" + "regexp" + "runtime" + "sort" + "strings" + "sync" ) +// virtualBlockRefCache 用于保存块关联的虚拟引用关键字。 +// 改进打开虚拟引用后加载文档的性能 https://github.com/siyuan-note/siyuan/issues/7378 +var virtualBlockRefCache, _ = ristretto.NewCache(&ristretto.Config{ + NumCounters: 1024000, + MaxCost: 102400, + BufferItems: 64, +}) + +func getBlockVirtualRefKeywords(root *ast.Node) (ret []string) { + val, ok := virtualBlockRefCache.Get(root.ID) + if !ok { + treeTitle := root.IALAttr("title") + buf := bytes.Buffer{} + ast.Walk(root, func(n *ast.Node, entering bool) ast.WalkStatus { + if !entering || !n.IsBlock() { + return ast.WalkContinue + } + + content := treenode.NodeStaticContent(n, nil) + buf.WriteString(content) + return ast.WalkContinue + }) + content := buf.String() + putBlockVirtualRefKeywords(content, root.ID, treeTitle) + val, ok = virtualBlockRefCache.Get(root.ID) + if !ok { + return + } + } + ret = val.([]string) + return +} + +func putBlockVirtualRefKeywords(blockContent, blockID, docTitle string) { + keywords := getVirtualRefKeywords(docTitle) + if 1 > len(keywords) { + return + } + + var hitKeywords []string + contentTmp := blockContent + if !Conf.Search.CaseSensitive { + contentTmp = strings.ToLower(blockContent) + } + for _, keyword := range keywords { + keywordTmp := keyword + if !Conf.Search.CaseSensitive { + keywordTmp = strings.ToLower(keyword) + } + + if strings.Contains(contentTmp, keywordTmp) { + hitKeywords = append(hitKeywords, keyword) + } + } + + if 1 > len(hitKeywords) { + return + } + + hitKeywords = gulu.Str.RemoveDuplicatedElem(hitKeywords) + virtualBlockRefCache.Set(blockID, hitKeywords, 1) +} + +func CacheVirtualBlockRefJob() { + virtualBlockRefCache.Del("virtual_ref") + + if !Conf.Editor.VirtualBlockRef { + return + } + + keywords := sql.QueryVirtualRefKeywords(Conf.Search.VirtualRefName, Conf.Search.VirtualRefAlias, Conf.Search.VirtualRefAnchor, Conf.Search.VirtualRefDoc) + virtualBlockRefCache.Set("virtual_ref", keywords, 1) + + boxes := Conf.GetOpenedBoxes() + luteEngine := lute.New() + for _, box := range boxes { + boxPath := filepath.Join(util.DataDir, box.ID) + var paths []string + filepath.Walk(boxPath, func(path string, info os.FileInfo, err error) error { + if boxPath == path { + // 跳过根路径(笔记本文件夹) + return nil + } + + if info.IsDir() { + if strings.HasPrefix(info.Name(), ".") { + return filepath.SkipDir + } + return nil + } + + if filepath.Ext(path) != ".sy" || strings.Contains(filepath.ToSlash(path), "/assets/") { + return nil + } + + p := path[len(boxPath):] + p = filepath.ToSlash(p) + paths = append(paths, p) + return nil + }) + + poolSize := runtime.NumCPU() + if 4 < poolSize { + poolSize = 4 + } + i := 0 + waitGroup := &sync.WaitGroup{} + pool, _ := ants.NewPoolWithFunc(poolSize, func(arg interface{}) { + defer waitGroup.Done() + + p := arg.(string) + tree, loadErr := filesys.LoadTree(box.ID, p, luteEngine) + if nil != loadErr { + return + } + + treeTitle := tree.Root.IALAttr("title") + buf := bytes.Buffer{} + ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { + if !entering || !n.IsBlock() { + return ast.WalkContinue + } + + content := treenode.NodeStaticContent(n, nil) + buf.WriteString(content) + return ast.WalkContinue + }) + content := buf.String() + putBlockVirtualRefKeywords(content, tree.ID, treeTitle) + i++ + logging.LogInfof("cached virtual block ref for tree [%s, %d/%d]", tree.ID, i, len(paths)) + }) + for _, p := range paths { + waitGroup.Add(1) + pool.Invoke(p) + } + waitGroup.Wait() + pool.Release() + } +} + func processVirtualRef(n *ast.Node, unlinks *[]*ast.Node, virtualBlockRefKeywords []string, refCount map[string]int, luteEngine *lute.Lute) bool { - if !Conf.Editor.VirtualBlockRef || 1 > len(virtualBlockRefKeywords) { + if !Conf.Editor.VirtualBlockRef { return false } @@ -43,8 +194,18 @@ func processVirtualRef(n *ast.Node, unlinks *[]*ast.Node, virtualBlockRefKeyword return false } + if 1 > len(virtualBlockRefKeywords) { + return false + } + content := string(n.Tokens) - newContent := markReplaceSpanWithSplit(content, virtualBlockRefKeywords, getMarkSpanStart(virtualBlockRefDataType), getMarkSpanEnd()) + tmp := gulu.Str.RemoveInvisible(content) + tmp = strings.TrimSpace(tmp) + if "" == tmp { + return false + } + + newContent := markReplaceSpanWithSplit(content, virtualBlockRefKeywords, search.GetMarkSpanStart(search.VirtualBlockRefDataType), search.GetMarkSpanEnd()) if content != newContent { // 虚拟引用排除命中自身块命名和别名的情况 https://github.com/siyuan-note/siyuan/issues/3185 var blockKeys []string @@ -55,7 +216,7 @@ func processVirtualRef(n *ast.Node, unlinks *[]*ast.Node, virtualBlockRefKeyword blockKeys = append(blockKeys, alias) } if 0 < len(blockKeys) { - keys := gulu.Str.SubstringsBetween(newContent, getMarkSpanStart(virtualBlockRefDataType), getMarkSpanEnd()) + keys := gulu.Str.SubstringsBetween(newContent, search.GetMarkSpanStart(search.VirtualBlockRefDataType), search.GetMarkSpanEnd()) for _, k := range keys { if gulu.Str.Contains(k, blockKeys) { return true @@ -83,7 +244,10 @@ func getVirtualRefKeywords(docName string) (ret []string) { return } - ret = sql.QueryVirtualRefKeywords(Conf.Search.VirtualRefName, Conf.Search.VirtualRefAlias, Conf.Search.VirtualRefAnchor, Conf.Search.VirtualRefDoc) + if val, ok := virtualBlockRefCache.Get("virtual_ref"); ok { + ret = val.([]string) + } + if "" != strings.TrimSpace(Conf.Editor.VirtualBlockRefInclude) { include := strings.ReplaceAll(Conf.Editor.VirtualBlockRefInclude, "\\,", "__comma@sep__") includes := strings.Split(include, ",") @@ -129,6 +293,7 @@ func getVirtualRefKeywords(docName string) (ret []string) { ret = gulu.Str.ExcludeElem(ret, []string{docName}) ret = prepareMarkKeywords(ret) + // 在 设置 - 搜索 中分别增加虚拟引用和反链提及 `关键字数量限制` https://github.com/siyuan-note/siyuan/issues/6603 if Conf.Search.VirtualRefKeywordsLimit < len(ret) { ret = ret[:Conf.Search.VirtualRefKeywordsLimit] }