diff --git a/kernel/api/av.go b/kernel/api/av.go index d9a20c275..68ef65702 100644 --- a/kernel/api/av.go +++ b/kernel/api/av.go @@ -17,6 +17,7 @@ package api import ( + "fmt" "net/http" "github.com/88250/gulu" @@ -27,6 +28,24 @@ import ( "github.com/siyuan-note/siyuan/kernel/util" ) +func getUnusedAttributeViews(c *gin.Context) { + ret := gulu.Ret.NewResult() + defer c.JSON(http.StatusOK, ret) + + unusedAttributeViews := model.UnusedAttributeViews() + total := len(unusedAttributeViews) + + const maxUnusedAttributeViews = 512 + if total > maxUnusedAttributeViews { + unusedAttributeViews = unusedAttributeViews[:maxUnusedAttributeViews] + util.PushMsg(fmt.Sprintf(model.Conf.Language(251), total, maxUnusedAttributeViews), 5000) + } + + ret.Data = map[string]interface{}{ + "unusedAttributeViews": unusedAttributeViews, + } +} + func getAttributeViewItemIDsByBoundIDs(c *gin.Context) { ret := gulu.Ret.NewResult() defer c.JSON(http.StatusOK, ret) diff --git a/kernel/api/outline.go b/kernel/api/outline.go index 83c35c6e3..5530720ee 100644 --- a/kernel/api/outline.go +++ b/kernel/api/outline.go @@ -44,6 +44,10 @@ func getDocOutline(c *gin.Context) { } rootID := arg["id"].(string) + if util.InvalidIDPattern(rootID, ret) { + return + } + headings, err := model.Outline(rootID, preview) if err != nil { ret.Code = 1 diff --git a/kernel/api/router.go b/kernel/api/router.go index 2b03ea394..0865f08d9 100644 --- a/kernel/api/router.go +++ b/kernel/api/router.go @@ -489,6 +489,7 @@ func ServeAPI(ginServer *gin.Engine) { ginServer.Handle("POST", "/api/av/getAttributeViewAddingBlockDefaultValues", model.CheckAuth, getAttributeViewAddingBlockDefaultValues) ginServer.Handle("POST", "/api/av/getAttributeViewBoundBlockIDsByItemIDs", model.CheckAuth, getAttributeViewBoundBlockIDsByItemIDs) ginServer.Handle("POST", "/api/av/getAttributeViewItemIDsByBoundIDs", model.CheckAuth, getAttributeViewItemIDsByBoundIDs) + ginServer.Handle("POST", "/api/av/getUnusedAttributeViews", model.CheckAuth, getUnusedAttributeViews) ginServer.Handle("POST", "/api/ai/chatGPT", model.CheckAuth, model.CheckAdminRole, chatGPT) ginServer.Handle("POST", "/api/ai/chatGPTWithAction", model.CheckAuth, model.CheckAdminRole, chatGPTWithAction) diff --git a/kernel/model/attribute_view.go b/kernel/model/attribute_view.go index dd8290a60..1f9066a8e 100644 --- a/kernel/model/attribute_view.go +++ b/kernel/model/attribute_view.go @@ -38,12 +38,108 @@ import ( "github.com/siyuan-note/siyuan/kernel/av" "github.com/siyuan-note/siyuan/kernel/cache" "github.com/siyuan-note/siyuan/kernel/filesys" + "github.com/siyuan-note/siyuan/kernel/search" "github.com/siyuan-note/siyuan/kernel/sql" "github.com/siyuan-note/siyuan/kernel/treenode" "github.com/siyuan-note/siyuan/kernel/util" "github.com/xrash/smetrics" ) +func UnusedAttributeViews() (ret []string) { + defer logging.Recover() + ret = []string{} + + allAvIDs, err := getAllAvIDs() + if err != nil { + return + } + + referencedAvIDs := map[string]bool{} + luteEngine := util.NewLute() + boxes := Conf.GetBoxes() + for _, box := range boxes { + pages := pagedPaths(filepath.Join(util.DataDir, box.ID), 32) + for _, paths := range pages { + var trees []*parse.Tree + for _, localPath := range paths { + tree, loadTreeErr := loadTree(localPath, luteEngine) + if nil != loadTreeErr { + continue + } + trees = append(trees, tree) + } + for _, tree := range trees { + for _, id := range getAvIDs(tree, allAvIDs) { + referencedAvIDs[id] = true + } + } + } + } + + templateAvIDs := search.FindAllMatchedTargets(filepath.Join(util.DataDir, "templates"), allAvIDs) + for _, id := range templateAvIDs { + referencedAvIDs[id] = true + } + + for _, id := range allAvIDs { + if !referencedAvIDs[id] { + ret = append(ret, id) + } + } + + ret = gulu.Str.RemoveDuplicatedElem(ret) + return +} + +func getAvIDs(tree *parse.Tree, allAvIDs []string) (ret []string) { + ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { + if !entering { + return ast.WalkContinue + } + + if ast.NodeAttributeView == n.Type { + ret = append(ret, n.AttributeViewID) + } + + for _, kv := range n.KramdownIAL { + ids := util.GetContainsSubStrs(kv[1], allAvIDs) + if 0 < len(ids) { + ret = append(ret, ids...) + } + } + + return ast.WalkContinue + }) + + ret = gulu.Str.RemoveDuplicatedElem(ret) + return +} + +func getAllAvIDs() (ret []string, err error) { + ret = []string{} + + entries, err := os.ReadDir(filepath.Join(util.DataDir, "storage", "av")) + if nil != err { + return + } + + for _, entry := range entries { + name := entry.Name() + if !strings.HasSuffix(name, ".json") { + continue + } + + id := strings.TrimSuffix(name, ".json") + if !ast.IsNodeIDPattern(id) { + continue + } + + ret = append(ret, id) + } + ret = gulu.Str.RemoveDuplicatedElem(ret) + return +} + func GetAttributeViewItemIDs(avID string, blockIDs []string) (ret map[string]string) { ret = map[string]string{} for _, blockID := range blockIDs { diff --git a/kernel/model/tree.go b/kernel/model/tree.go index 06bce454b..9b69cfa32 100644 --- a/kernel/model/tree.go +++ b/kernel/model/tree.go @@ -17,17 +17,13 @@ package model import ( - "bufio" - "bytes" "errors" "fmt" "io/fs" "os" "path" "path/filepath" - "runtime" "strings" - "sync" "time" "github.com/88250/lute" @@ -38,6 +34,7 @@ import ( "github.com/siyuan-note/logging" "github.com/siyuan-note/siyuan/kernel/av" "github.com/siyuan-note/siyuan/kernel/filesys" + "github.com/siyuan-note/siyuan/kernel/search" "github.com/siyuan-note/siyuan/kernel/sql" "github.com/siyuan-note/siyuan/kernel/task" "github.com/siyuan-note/siyuan/kernel/treenode" @@ -317,7 +314,7 @@ func findUnindexedTreePathInAllBoxes(id string) (ret string) { boxes := Conf.GetBoxes() for _, box := range boxes { root := filepath.Join(util.DataDir, box.ID) - paths := findAllOccurrences(root, id) + paths := search.FindAllMatchedPaths(root, []string{id}) var rootIDs []string rootIDPaths := map[string]string{} for _, p := range paths { @@ -335,88 +332,3 @@ func findUnindexedTreePathInAllBoxes(id string) (ret string) { } return } - -func findAllOccurrences(root string, target string) []string { - if root == "" || target == "" { - return nil - } - - searchBytes := []byte(target) - jobs := make(chan string, 256) // 任务通道 - results := make(chan string, 256) // 结果通道 - - // 用于等待所有 Worker 完成 - var wg sync.WaitGroup - // 用于等待结果收集器完成 - var collectWg sync.WaitGroup - - // 1. 启动结果收集协程 - var matchedPaths []string - collectWg.Add(1) - go func() { - defer collectWg.Done() - for path := range results { - matchedPaths = append(matchedPaths, path) - } - }() - - // 2. 启动并发 Worker Pool (基于 CPU 核心数) - numWorkers := runtime.NumCPU() - for i := 0; i < numWorkers; i++ { - wg.Add(1) - go func() { - defer wg.Done() - for path := range jobs { - if containsTarget(path, searchBytes) { - results <- path - } - } - }() - } - - // 3. 遍历文件夹并分发任务 - _ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { - if err == nil && d.Type().IsRegular() { - jobs <- path - } - return nil - }) - - // 4. 关闭通道并等待结束 - close(jobs) // 停止分发任务 - wg.Wait() // 等待所有 Worker 处理完 - close(results) // 停止收集结果 - collectWg.Wait() // 等待切片组装完成 - - return matchedPaths -} - -// containsTarget 针对大文件优化的字节流匹配函数 -func containsTarget(path string, target []byte) bool { - f, err := os.Open(path) - if err != nil { - return false - } - defer f.Close() - - // 1MB 缓冲区 - reader := bufio.NewReaderSize(f, 1024*1024) - for { - // 使用 ReadSlice 实现零拷贝读取 - line, err := reader.ReadSlice('\n') - if len(line) > 0 && bytes.Contains(line, target) { - return true - } - if err != nil { - if err == bufio.ErrBufferFull { - // 处理超过 1MB 的超长行,直接跳过当前行剩余部分 - for err == bufio.ErrBufferFull { - _, err = reader.ReadSlice('\n') - } - continue - } - break // EOF 或其他错误 - } - } - return false -} diff --git a/kernel/search/find.go b/kernel/search/find.go new file mode 100644 index 000000000..104889e79 --- /dev/null +++ b/kernel/search/find.go @@ -0,0 +1,225 @@ +// SiYuan - Refactor your thinking +// Copyright (c) 2020-present, b3log.org +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package search + +import ( + "bytes" + "io" + "os" + "path/filepath" + "runtime" + "sync" +) + +type Match struct { + Path string + Target string +} + +func FindAllMatchedPaths(root string, targets []string) []string { + matches := FindAllMatches(root, targets) + return pathsFromMatches(matches) +} + +func FindAllMatchedTargets(root string, targets []string) []string { + matches := FindAllMatches(root, targets) + return targetsFromMatches(matches) +} + +// FindAllMatches 遍历 root 下的文件,返回所有命中的结果(文件路径 + 命中目标) +// targets 为空或 root 为空时返回 nil +func FindAllMatches(root string, targets []string) []Match { + if root == "" || len(targets) == 0 { + return nil + } + + // 构建基于首字节的模式索引,并计算最长模式长度 + patternIndex := make(map[byte][][]byte) + var maxLen int + for _, t := range targets { + if t == "" { + continue + } + b := []byte(t) + if len(b) > maxLen { + maxLen = len(b) + } + patternIndex[b[0]] = append(patternIndex[b[0]], b) + } + if len(patternIndex) == 0 { + return nil + } + + jobs := make(chan string, 256) + results := make(chan Match, 256) + + var wg sync.WaitGroup + var collectWg sync.WaitGroup + + var matches []Match + collectWg.Add(1) + go func() { + defer collectWg.Done() + for m := range results { + matches = append(matches, m) + } + }() + + numWorkers := runtime.NumCPU() + for i := 0; i < numWorkers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for p := range jobs { + hits := scanFileForTargets(p, patternIndex, maxLen) + if len(hits) > 0 { + for _, t := range hits { + results <- Match{Path: p, Target: t} + } + } + } + }() + } + + _ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { + if err == nil && d.Type().IsRegular() { + jobs <- path + } + return nil + }) + + close(jobs) + wg.Wait() + close(results) + collectWg.Wait() + return matches +} + +// scanFileForTargets 在文件中流式搜索所有目标(基于首字节索引),返回去重后的命中目标字符串列表 +func scanFileForTargets(path string, patternIndex map[byte][][]byte, maxLen int) []string { + f, err := os.Open(path) + if err != nil { + return nil + } + defer f.Close() + + // 构建字节位图,加速首字节检测 + var bitmap [256]bool + for b := range patternIndex { + bitmap[b] = true + } + + found := make(map[string]struct{}) + buf := make([]byte, 64<<10) // 64KB + // 保留上一次块末尾的重叠数据以支持跨块匹配 + var tail []byte + + for { + n, err := f.Read(buf) + if n > 0 { + // data = tail + buf[:n] + data := make([]byte, len(tail)+n) + copy(data, tail) + copy(data[len(tail):], buf[:n]) + + // 扫描 data,查找任意候选首字节位置 + i := 0 + for i < len(data) { + // 快速跳过非候选字节 + for i < len(data) && !bitmap[data[i]] { + i++ + } + if i >= len(data) { + break + } + b := data[i] + // 对应首字节的所有模式进行校验 + for _, pat := range patternIndex[b] { + pl := len(pat) + // 如果剩余字节不足以完全匹配,则交由下一轮(通过 tail 保证) + if i+pl <= len(data) { + if bytes.Equal(pat, data[i:i+pl]) { + found[string(pat)] = struct{}{} + } + } + } + i++ + } + + // 保留最后 maxLen-1 字节作为下一块的 tail(避免超长内存分配) + if maxLen <= 1 { + tail = nil + } else { + if len(data) >= maxLen-1 { + tail = append(tail[:0], data[len(data)-(maxLen-1):]...) + } else { + tail = append(tail[:0], data...) + } + } + } + if err != nil { + if err == io.EOF { + break + } + // 读取出错,返回已有结果 + break + } + } + + if len(found) == 0 { + return nil + } + res := make([]string, 0, len(found)) + for k := range found { + res = append(res, k) + } + return res +} + +// pathsFromMatches 从 Match 列表中返回去重的路径切片(保留首次出现顺序) +func pathsFromMatches(ms []Match) []string { + if len(ms) == 0 { + return nil + } + seen := make(map[string]struct{}) + paths := make([]string, 0) + for _, m := range ms { + if _, ok := seen[m.Path]; ok { + continue + } + seen[m.Path] = struct{}{} + paths = append(paths, m.Path) + } + return paths +} + +// targetsFromMatches 从 Match 列表中返回去重的目标切片(保留首次出现顺序) +func targetsFromMatches(ms []Match) []string { + if len(ms) == 0 { + return nil + } + seen := make(map[string]struct{}) + targets := make([]string, 0) + for _, m := range ms { + if _, ok := seen[m.Target]; ok { + continue + } + seen[m.Target] = struct{}{} + targets = append(targets, m.Target) + } + return targets +} diff --git a/kernel/util/misc.go b/kernel/util/misc.go index ddefcb08e..63151eb15 100644 --- a/kernel/util/misc.go +++ b/kernel/util/misc.go @@ -207,6 +207,15 @@ func ContainsSubStr(s string, subStrs []string) bool { return false } +func GetContainsSubStrs(s string, subStrs []string) (ret []string) { + for _, v := range subStrs { + if strings.Contains(s, v) { + ret = append(ret, v) + } + } + return +} + func ReplaceStr(strs []string, old, new string) (ret []string, changed bool) { if old == new { return strs, false