From 1fa674e4303610e30aa93fb48b14fa72fc6e3fb9 Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Tue, 27 Jan 2026 22:58:56 +0800 Subject: [PATCH 1/3] :art: Supports cleaning up unreferenced databases https://github.com/siyuan-note/siyuan/issues/11569 Signed-off-by: Daniel <845765@qq.com> --- kernel/api/av.go | 19 ++++ kernel/api/router.go | 1 + kernel/model/attribute_view.go | 96 +++++++++++++++++ kernel/model/tree.go | 92 +--------------- kernel/search/find.go | 192 +++++++++++++++++++++++++++++++++ kernel/util/misc.go | 9 ++ 6 files changed, 319 insertions(+), 90 deletions(-) create mode 100644 kernel/search/find.go diff --git a/kernel/api/av.go b/kernel/api/av.go index d9a20c275..68ef65702 100644 --- a/kernel/api/av.go +++ b/kernel/api/av.go @@ -17,6 +17,7 @@ package api import ( + "fmt" "net/http" "github.com/88250/gulu" @@ -27,6 +28,24 @@ import ( "github.com/siyuan-note/siyuan/kernel/util" ) +func getUnusedAttributeViews(c *gin.Context) { + ret := gulu.Ret.NewResult() + defer c.JSON(http.StatusOK, ret) + + unusedAttributeViews := model.UnusedAttributeViews() + total := len(unusedAttributeViews) + + const maxUnusedAttributeViews = 512 + if total > maxUnusedAttributeViews { + unusedAttributeViews = unusedAttributeViews[:maxUnusedAttributeViews] + util.PushMsg(fmt.Sprintf(model.Conf.Language(251), total, maxUnusedAttributeViews), 5000) + } + + ret.Data = map[string]interface{}{ + "unusedAttributeViews": unusedAttributeViews, + } +} + func getAttributeViewItemIDsByBoundIDs(c *gin.Context) { ret := gulu.Ret.NewResult() defer c.JSON(http.StatusOK, ret) diff --git a/kernel/api/router.go b/kernel/api/router.go index 2b03ea394..0865f08d9 100644 --- a/kernel/api/router.go +++ b/kernel/api/router.go @@ -489,6 +489,7 @@ func ServeAPI(ginServer *gin.Engine) { ginServer.Handle("POST", "/api/av/getAttributeViewAddingBlockDefaultValues", model.CheckAuth, getAttributeViewAddingBlockDefaultValues) ginServer.Handle("POST", "/api/av/getAttributeViewBoundBlockIDsByItemIDs", model.CheckAuth, getAttributeViewBoundBlockIDsByItemIDs) ginServer.Handle("POST", "/api/av/getAttributeViewItemIDsByBoundIDs", model.CheckAuth, getAttributeViewItemIDsByBoundIDs) + ginServer.Handle("POST", "/api/av/getUnusedAttributeViews", model.CheckAuth, getUnusedAttributeViews) ginServer.Handle("POST", "/api/ai/chatGPT", model.CheckAuth, model.CheckAdminRole, chatGPT) ginServer.Handle("POST", "/api/ai/chatGPTWithAction", model.CheckAuth, model.CheckAdminRole, chatGPTWithAction) diff --git a/kernel/model/attribute_view.go b/kernel/model/attribute_view.go index dd8290a60..1f9066a8e 100644 --- a/kernel/model/attribute_view.go +++ b/kernel/model/attribute_view.go @@ -38,12 +38,108 @@ import ( "github.com/siyuan-note/siyuan/kernel/av" "github.com/siyuan-note/siyuan/kernel/cache" "github.com/siyuan-note/siyuan/kernel/filesys" + "github.com/siyuan-note/siyuan/kernel/search" "github.com/siyuan-note/siyuan/kernel/sql" "github.com/siyuan-note/siyuan/kernel/treenode" "github.com/siyuan-note/siyuan/kernel/util" "github.com/xrash/smetrics" ) +func UnusedAttributeViews() (ret []string) { + defer logging.Recover() + ret = []string{} + + allAvIDs, err := getAllAvIDs() + if err != nil { + return + } + + referencedAvIDs := map[string]bool{} + luteEngine := util.NewLute() + boxes := Conf.GetBoxes() + for _, box := range boxes { + pages := pagedPaths(filepath.Join(util.DataDir, box.ID), 32) + for _, paths := range pages { + var trees []*parse.Tree + for _, localPath := range paths { + tree, loadTreeErr := loadTree(localPath, luteEngine) + if nil != loadTreeErr { + continue + } + trees = append(trees, tree) + } + for _, tree := range trees { + for _, id := range getAvIDs(tree, allAvIDs) { + referencedAvIDs[id] = true + } + } + } + } + + templateAvIDs := search.FindAllMatchedTargets(filepath.Join(util.DataDir, "templates"), allAvIDs) + for _, id := range templateAvIDs { + referencedAvIDs[id] = true + } + + for _, id := range allAvIDs { + if !referencedAvIDs[id] { + ret = append(ret, id) + } + } + + ret = gulu.Str.RemoveDuplicatedElem(ret) + return +} + +func getAvIDs(tree *parse.Tree, allAvIDs []string) (ret []string) { + ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { + if !entering { + return ast.WalkContinue + } + + if ast.NodeAttributeView == n.Type { + ret = append(ret, n.AttributeViewID) + } + + for _, kv := range n.KramdownIAL { + ids := util.GetContainsSubStrs(kv[1], allAvIDs) + if 0 < len(ids) { + ret = append(ret, ids...) + } + } + + return ast.WalkContinue + }) + + ret = gulu.Str.RemoveDuplicatedElem(ret) + return +} + +func getAllAvIDs() (ret []string, err error) { + ret = []string{} + + entries, err := os.ReadDir(filepath.Join(util.DataDir, "storage", "av")) + if nil != err { + return + } + + for _, entry := range entries { + name := entry.Name() + if !strings.HasSuffix(name, ".json") { + continue + } + + id := strings.TrimSuffix(name, ".json") + if !ast.IsNodeIDPattern(id) { + continue + } + + ret = append(ret, id) + } + ret = gulu.Str.RemoveDuplicatedElem(ret) + return +} + func GetAttributeViewItemIDs(avID string, blockIDs []string) (ret map[string]string) { ret = map[string]string{} for _, blockID := range blockIDs { diff --git a/kernel/model/tree.go b/kernel/model/tree.go index 06bce454b..9b69cfa32 100644 --- a/kernel/model/tree.go +++ b/kernel/model/tree.go @@ -17,17 +17,13 @@ package model import ( - "bufio" - "bytes" "errors" "fmt" "io/fs" "os" "path" "path/filepath" - "runtime" "strings" - "sync" "time" "github.com/88250/lute" @@ -38,6 +34,7 @@ import ( "github.com/siyuan-note/logging" "github.com/siyuan-note/siyuan/kernel/av" "github.com/siyuan-note/siyuan/kernel/filesys" + "github.com/siyuan-note/siyuan/kernel/search" "github.com/siyuan-note/siyuan/kernel/sql" "github.com/siyuan-note/siyuan/kernel/task" "github.com/siyuan-note/siyuan/kernel/treenode" @@ -317,7 +314,7 @@ func findUnindexedTreePathInAllBoxes(id string) (ret string) { boxes := Conf.GetBoxes() for _, box := range boxes { root := filepath.Join(util.DataDir, box.ID) - paths := findAllOccurrences(root, id) + paths := search.FindAllMatchedPaths(root, []string{id}) var rootIDs []string rootIDPaths := map[string]string{} for _, p := range paths { @@ -335,88 +332,3 @@ func findUnindexedTreePathInAllBoxes(id string) (ret string) { } return } - -func findAllOccurrences(root string, target string) []string { - if root == "" || target == "" { - return nil - } - - searchBytes := []byte(target) - jobs := make(chan string, 256) // 任务通道 - results := make(chan string, 256) // 结果通道 - - // 用于等待所有 Worker 完成 - var wg sync.WaitGroup - // 用于等待结果收集器完成 - var collectWg sync.WaitGroup - - // 1. 启动结果收集协程 - var matchedPaths []string - collectWg.Add(1) - go func() { - defer collectWg.Done() - for path := range results { - matchedPaths = append(matchedPaths, path) - } - }() - - // 2. 启动并发 Worker Pool (基于 CPU 核心数) - numWorkers := runtime.NumCPU() - for i := 0; i < numWorkers; i++ { - wg.Add(1) - go func() { - defer wg.Done() - for path := range jobs { - if containsTarget(path, searchBytes) { - results <- path - } - } - }() - } - - // 3. 遍历文件夹并分发任务 - _ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { - if err == nil && d.Type().IsRegular() { - jobs <- path - } - return nil - }) - - // 4. 关闭通道并等待结束 - close(jobs) // 停止分发任务 - wg.Wait() // 等待所有 Worker 处理完 - close(results) // 停止收集结果 - collectWg.Wait() // 等待切片组装完成 - - return matchedPaths -} - -// containsTarget 针对大文件优化的字节流匹配函数 -func containsTarget(path string, target []byte) bool { - f, err := os.Open(path) - if err != nil { - return false - } - defer f.Close() - - // 1MB 缓冲区 - reader := bufio.NewReaderSize(f, 1024*1024) - for { - // 使用 ReadSlice 实现零拷贝读取 - line, err := reader.ReadSlice('\n') - if len(line) > 0 && bytes.Contains(line, target) { - return true - } - if err != nil { - if err == bufio.ErrBufferFull { - // 处理超过 1MB 的超长行,直接跳过当前行剩余部分 - for err == bufio.ErrBufferFull { - _, err = reader.ReadSlice('\n') - } - continue - } - break // EOF 或其他错误 - } - } - return false -} diff --git a/kernel/search/find.go b/kernel/search/find.go new file mode 100644 index 000000000..41ae54740 --- /dev/null +++ b/kernel/search/find.go @@ -0,0 +1,192 @@ +// SiYuan - Refactor your thinking +// Copyright (c) 2020-present, b3log.org +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package search + +import ( + "bufio" + "bytes" + "os" + "path/filepath" + "runtime" + "sync" +) + +type Match struct { + Path string + Target string +} + +func FindAllMatchedPaths(root string, targets []string) []string { + matches := FindAllMatches(root, targets) + return pathsFromMatches(matches) +} + +func FindAllMatchedTargets(root string, targets []string) []string { + matches := FindAllMatches(root, targets) + return targetsFromMatches(matches) +} + +// FindAllMatches 遍历 root 下的文件,返回所有命中的结果(文件路径 + 命中目标) +// targets 为空或 root 为空时返回 nil +func FindAllMatches(root string, targets []string) []Match { + if root == "" || len(targets) == 0 { + return nil + } + + var searchBytes [][]byte + for _, t := range targets { + if t != "" { + searchBytes = append(searchBytes, []byte(t)) + } + } + if len(searchBytes) == 0 { + return nil + } + + jobs := make(chan string, 256) + results := make(chan Match, 256) + + var wg sync.WaitGroup + var collectWg sync.WaitGroup + + var matches []Match + collectWg.Add(1) + go func() { + defer collectWg.Done() + for m := range results { + matches = append(matches, m) + } + }() + + numWorkers := runtime.NumCPU() + for i := 0; i < numWorkers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for p := range jobs { + hits := scanFileForTargets(p, searchBytes) + if len(hits) > 0 { + for _, t := range hits { + results <- Match{Path: p, Target: t} + } + } + } + }() + } + + _ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { + if err == nil && d.Type().IsRegular() { + jobs <- path + } + return nil + }) + + close(jobs) + wg.Wait() + close(results) + collectWg.Wait() + return matches +} + +// scanFileForTargets 在文件中搜索所有目标,返回去重后的命中目标字符串列表 +func scanFileForTargets(path string, targets [][]byte) []string { + f, err := os.Open(path) + if err != nil { + return nil + } + defer f.Close() + + reader := bufio.NewReaderSize(f, 1024*1024) // 1MB 缓冲 + found := make(map[string]struct{}) + remaining := len(targets) + + for { + line, err := reader.ReadSlice('\n') + if len(line) > 0 { + for _, t := range targets { + ts := string(t) + if _, ok := found[ts]; ok { + continue + } + if bytes.Contains(line, t) { + found[ts] = struct{}{} + remaining-- + if remaining == 0 { + // 找到所有目标,提前返回 + res := make([]string, 0, len(found)) + for k := range found { + res = append(res, k) + } + return res + } + } + } + } + if err != nil { + if err == bufio.ErrBufferFull { + for err == bufio.ErrBufferFull { + _, err = reader.ReadSlice('\n') + } + continue + } + break + } + } + + if len(found) == 0 { + return nil + } + res := make([]string, 0, len(found)) + for k := range found { + res = append(res, k) + } + return res +} + +// pathsFromMatches 从 Match 列表中返回去重的路径切片(保留首次出现顺序) +func pathsFromMatches(ms []Match) []string { + if len(ms) == 0 { + return nil + } + seen := make(map[string]struct{}) + paths := make([]string, 0) + for _, m := range ms { + if _, ok := seen[m.Path]; ok { + continue + } + seen[m.Path] = struct{}{} + paths = append(paths, m.Path) + } + return paths +} + +// targetsFromMatches 从 Match 列表中返回去重的目标切片(保留首次出现顺序) +func targetsFromMatches(ms []Match) []string { + if len(ms) == 0 { + return nil + } + seen := make(map[string]struct{}) + targets := make([]string, 0) + for _, m := range ms { + if _, ok := seen[m.Target]; ok { + continue + } + seen[m.Target] = struct{}{} + targets = append(targets, m.Target) + } + return targets +} diff --git a/kernel/util/misc.go b/kernel/util/misc.go index ddefcb08e..63151eb15 100644 --- a/kernel/util/misc.go +++ b/kernel/util/misc.go @@ -207,6 +207,15 @@ func ContainsSubStr(s string, subStrs []string) bool { return false } +func GetContainsSubStrs(s string, subStrs []string) (ret []string) { + for _, v := range subStrs { + if strings.Contains(s, v) { + ret = append(ret, v) + } + } + return +} + func ReplaceStr(strs []string, old, new string) (ret []string, changed bool) { if old == new { return strs, false From 34e3261bce5960609f85ab49efffdc38d3b93c3d Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Wed, 28 Jan 2026 10:17:12 +0800 Subject: [PATCH 2/3] :art: https://github.com/siyuan-note/siyuan/issues/16931#issuecomment-3808557014 Signed-off-by: Daniel <845765@qq.com> --- kernel/api/outline.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/api/outline.go b/kernel/api/outline.go index 83c35c6e3..5530720ee 100644 --- a/kernel/api/outline.go +++ b/kernel/api/outline.go @@ -44,6 +44,10 @@ func getDocOutline(c *gin.Context) { } rootID := arg["id"].(string) + if util.InvalidIDPattern(rootID, ret) { + return + } + headings, err := model.Outline(rootID, preview) if err != nil { ret.Code = 1 From 1aa84bd749c13979763fba3e28349b52cbccb853 Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Wed, 28 Jan 2026 10:44:42 +0800 Subject: [PATCH 3/3] :zap: Improve find performance Signed-off-by: Daniel <845765@qq.com> --- kernel/search/find.go | 93 +++++++++++++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 30 deletions(-) diff --git a/kernel/search/find.go b/kernel/search/find.go index 41ae54740..104889e79 100644 --- a/kernel/search/find.go +++ b/kernel/search/find.go @@ -17,8 +17,8 @@ package search import ( - "bufio" "bytes" + "io" "os" "path/filepath" "runtime" @@ -47,13 +47,20 @@ func FindAllMatches(root string, targets []string) []Match { return nil } - var searchBytes [][]byte + // 构建基于首字节的模式索引,并计算最长模式长度 + patternIndex := make(map[byte][][]byte) + var maxLen int for _, t := range targets { - if t != "" { - searchBytes = append(searchBytes, []byte(t)) + if t == "" { + continue } + b := []byte(t) + if len(b) > maxLen { + maxLen = len(b) + } + patternIndex[b[0]] = append(patternIndex[b[0]], b) } - if len(searchBytes) == 0 { + if len(patternIndex) == 0 { return nil } @@ -78,7 +85,7 @@ func FindAllMatches(root string, targets []string) []Match { go func() { defer wg.Done() for p := range jobs { - hits := scanFileForTargets(p, searchBytes) + hits := scanFileForTargets(p, patternIndex, maxLen) if len(hits) > 0 { for _, t := range hits { results <- Match{Path: p, Target: t} @@ -102,47 +109,73 @@ func FindAllMatches(root string, targets []string) []Match { return matches } -// scanFileForTargets 在文件中搜索所有目标,返回去重后的命中目标字符串列表 -func scanFileForTargets(path string, targets [][]byte) []string { +// scanFileForTargets 在文件中流式搜索所有目标(基于首字节索引),返回去重后的命中目标字符串列表 +func scanFileForTargets(path string, patternIndex map[byte][][]byte, maxLen int) []string { f, err := os.Open(path) if err != nil { return nil } defer f.Close() - reader := bufio.NewReaderSize(f, 1024*1024) // 1MB 缓冲 + // 构建字节位图,加速首字节检测 + var bitmap [256]bool + for b := range patternIndex { + bitmap[b] = true + } + found := make(map[string]struct{}) - remaining := len(targets) + buf := make([]byte, 64<<10) // 64KB + // 保留上一次块末尾的重叠数据以支持跨块匹配 + var tail []byte for { - line, err := reader.ReadSlice('\n') - if len(line) > 0 { - for _, t := range targets { - ts := string(t) - if _, ok := found[ts]; ok { - continue + n, err := f.Read(buf) + if n > 0 { + // data = tail + buf[:n] + data := make([]byte, len(tail)+n) + copy(data, tail) + copy(data[len(tail):], buf[:n]) + + // 扫描 data,查找任意候选首字节位置 + i := 0 + for i < len(data) { + // 快速跳过非候选字节 + for i < len(data) && !bitmap[data[i]] { + i++ } - if bytes.Contains(line, t) { - found[ts] = struct{}{} - remaining-- - if remaining == 0 { - // 找到所有目标,提前返回 - res := make([]string, 0, len(found)) - for k := range found { - res = append(res, k) + if i >= len(data) { + break + } + b := data[i] + // 对应首字节的所有模式进行校验 + for _, pat := range patternIndex[b] { + pl := len(pat) + // 如果剩余字节不足以完全匹配,则交由下一轮(通过 tail 保证) + if i+pl <= len(data) { + if bytes.Equal(pat, data[i:i+pl]) { + found[string(pat)] = struct{}{} } - return res } } + i++ + } + + // 保留最后 maxLen-1 字节作为下一块的 tail(避免超长内存分配) + if maxLen <= 1 { + tail = nil + } else { + if len(data) >= maxLen-1 { + tail = append(tail[:0], data[len(data)-(maxLen-1):]...) + } else { + tail = append(tail[:0], data...) + } } } if err != nil { - if err == bufio.ErrBufferFull { - for err == bufio.ErrBufferFull { - _, err = reader.ReadSlice('\n') - } - continue + if err == io.EOF { + break } + // 读取出错,返回已有结果 break } }