diff --git a/kernel/api/av.go b/kernel/api/av.go index d9a20c275..68ef65702 100644 --- a/kernel/api/av.go +++ b/kernel/api/av.go @@ -17,6 +17,7 @@ package api import ( + "fmt" "net/http" "github.com/88250/gulu" @@ -27,6 +28,24 @@ import ( "github.com/siyuan-note/siyuan/kernel/util" ) +func getUnusedAttributeViews(c *gin.Context) { + ret := gulu.Ret.NewResult() + defer c.JSON(http.StatusOK, ret) + + unusedAttributeViews := model.UnusedAttributeViews() + total := len(unusedAttributeViews) + + const maxUnusedAttributeViews = 512 + if total > maxUnusedAttributeViews { + unusedAttributeViews = unusedAttributeViews[:maxUnusedAttributeViews] + util.PushMsg(fmt.Sprintf(model.Conf.Language(251), total, maxUnusedAttributeViews), 5000) + } + + ret.Data = map[string]interface{}{ + "unusedAttributeViews": unusedAttributeViews, + } +} + func getAttributeViewItemIDsByBoundIDs(c *gin.Context) { ret := gulu.Ret.NewResult() defer c.JSON(http.StatusOK, ret) diff --git a/kernel/api/router.go b/kernel/api/router.go index 2b03ea394..0865f08d9 100644 --- a/kernel/api/router.go +++ b/kernel/api/router.go @@ -489,6 +489,7 @@ func ServeAPI(ginServer *gin.Engine) { ginServer.Handle("POST", "/api/av/getAttributeViewAddingBlockDefaultValues", model.CheckAuth, getAttributeViewAddingBlockDefaultValues) ginServer.Handle("POST", "/api/av/getAttributeViewBoundBlockIDsByItemIDs", model.CheckAuth, getAttributeViewBoundBlockIDsByItemIDs) ginServer.Handle("POST", "/api/av/getAttributeViewItemIDsByBoundIDs", model.CheckAuth, getAttributeViewItemIDsByBoundIDs) + ginServer.Handle("POST", "/api/av/getUnusedAttributeViews", model.CheckAuth, getUnusedAttributeViews) ginServer.Handle("POST", "/api/ai/chatGPT", model.CheckAuth, model.CheckAdminRole, chatGPT) ginServer.Handle("POST", "/api/ai/chatGPTWithAction", model.CheckAuth, model.CheckAdminRole, chatGPTWithAction) diff --git a/kernel/model/attribute_view.go b/kernel/model/attribute_view.go index dd8290a60..1f9066a8e 100644 --- a/kernel/model/attribute_view.go +++ b/kernel/model/attribute_view.go @@ -38,12 +38,108 @@ import ( "github.com/siyuan-note/siyuan/kernel/av" "github.com/siyuan-note/siyuan/kernel/cache" "github.com/siyuan-note/siyuan/kernel/filesys" + "github.com/siyuan-note/siyuan/kernel/search" "github.com/siyuan-note/siyuan/kernel/sql" "github.com/siyuan-note/siyuan/kernel/treenode" "github.com/siyuan-note/siyuan/kernel/util" "github.com/xrash/smetrics" ) +func UnusedAttributeViews() (ret []string) { + defer logging.Recover() + ret = []string{} + + allAvIDs, err := getAllAvIDs() + if err != nil { + return + } + + referencedAvIDs := map[string]bool{} + luteEngine := util.NewLute() + boxes := Conf.GetBoxes() + for _, box := range boxes { + pages := pagedPaths(filepath.Join(util.DataDir, box.ID), 32) + for _, paths := range pages { + var trees []*parse.Tree + for _, localPath := range paths { + tree, loadTreeErr := loadTree(localPath, luteEngine) + if nil != loadTreeErr { + continue + } + trees = append(trees, tree) + } + for _, tree := range trees { + for _, id := range getAvIDs(tree, allAvIDs) { + referencedAvIDs[id] = true + } + } + } + } + + templateAvIDs := search.FindAllMatchedTargets(filepath.Join(util.DataDir, "templates"), allAvIDs) + for _, id := range templateAvIDs { + referencedAvIDs[id] = true + } + + for _, id := range allAvIDs { + if !referencedAvIDs[id] { + ret = append(ret, id) + } + } + + ret = gulu.Str.RemoveDuplicatedElem(ret) + return +} + +func getAvIDs(tree *parse.Tree, allAvIDs []string) (ret []string) { + ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { + if !entering { + return ast.WalkContinue + } + + if ast.NodeAttributeView == n.Type { + ret = append(ret, n.AttributeViewID) + } + + for _, kv := range n.KramdownIAL { + ids := util.GetContainsSubStrs(kv[1], allAvIDs) + if 0 < len(ids) { + ret = append(ret, ids...) + } + } + + return ast.WalkContinue + }) + + ret = gulu.Str.RemoveDuplicatedElem(ret) + return +} + +func getAllAvIDs() (ret []string, err error) { + ret = []string{} + + entries, err := os.ReadDir(filepath.Join(util.DataDir, "storage", "av")) + if nil != err { + return + } + + for _, entry := range entries { + name := entry.Name() + if !strings.HasSuffix(name, ".json") { + continue + } + + id := strings.TrimSuffix(name, ".json") + if !ast.IsNodeIDPattern(id) { + continue + } + + ret = append(ret, id) + } + ret = gulu.Str.RemoveDuplicatedElem(ret) + return +} + func GetAttributeViewItemIDs(avID string, blockIDs []string) (ret map[string]string) { ret = map[string]string{} for _, blockID := range blockIDs { diff --git a/kernel/model/tree.go b/kernel/model/tree.go index 06bce454b..9b69cfa32 100644 --- a/kernel/model/tree.go +++ b/kernel/model/tree.go @@ -17,17 +17,13 @@ package model import ( - "bufio" - "bytes" "errors" "fmt" "io/fs" "os" "path" "path/filepath" - "runtime" "strings" - "sync" "time" "github.com/88250/lute" @@ -38,6 +34,7 @@ import ( "github.com/siyuan-note/logging" "github.com/siyuan-note/siyuan/kernel/av" "github.com/siyuan-note/siyuan/kernel/filesys" + "github.com/siyuan-note/siyuan/kernel/search" "github.com/siyuan-note/siyuan/kernel/sql" "github.com/siyuan-note/siyuan/kernel/task" "github.com/siyuan-note/siyuan/kernel/treenode" @@ -317,7 +314,7 @@ func findUnindexedTreePathInAllBoxes(id string) (ret string) { boxes := Conf.GetBoxes() for _, box := range boxes { root := filepath.Join(util.DataDir, box.ID) - paths := findAllOccurrences(root, id) + paths := search.FindAllMatchedPaths(root, []string{id}) var rootIDs []string rootIDPaths := map[string]string{} for _, p := range paths { @@ -335,88 +332,3 @@ func findUnindexedTreePathInAllBoxes(id string) (ret string) { } return } - -func findAllOccurrences(root string, target string) []string { - if root == "" || target == "" { - return nil - } - - searchBytes := []byte(target) - jobs := make(chan string, 256) // 任务通道 - results := make(chan string, 256) // 结果通道 - - // 用于等待所有 Worker 完成 - var wg sync.WaitGroup - // 用于等待结果收集器完成 - var collectWg sync.WaitGroup - - // 1. 启动结果收集协程 - var matchedPaths []string - collectWg.Add(1) - go func() { - defer collectWg.Done() - for path := range results { - matchedPaths = append(matchedPaths, path) - } - }() - - // 2. 启动并发 Worker Pool (基于 CPU 核心数) - numWorkers := runtime.NumCPU() - for i := 0; i < numWorkers; i++ { - wg.Add(1) - go func() { - defer wg.Done() - for path := range jobs { - if containsTarget(path, searchBytes) { - results <- path - } - } - }() - } - - // 3. 遍历文件夹并分发任务 - _ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { - if err == nil && d.Type().IsRegular() { - jobs <- path - } - return nil - }) - - // 4. 关闭通道并等待结束 - close(jobs) // 停止分发任务 - wg.Wait() // 等待所有 Worker 处理完 - close(results) // 停止收集结果 - collectWg.Wait() // 等待切片组装完成 - - return matchedPaths -} - -// containsTarget 针对大文件优化的字节流匹配函数 -func containsTarget(path string, target []byte) bool { - f, err := os.Open(path) - if err != nil { - return false - } - defer f.Close() - - // 1MB 缓冲区 - reader := bufio.NewReaderSize(f, 1024*1024) - for { - // 使用 ReadSlice 实现零拷贝读取 - line, err := reader.ReadSlice('\n') - if len(line) > 0 && bytes.Contains(line, target) { - return true - } - if err != nil { - if err == bufio.ErrBufferFull { - // 处理超过 1MB 的超长行,直接跳过当前行剩余部分 - for err == bufio.ErrBufferFull { - _, err = reader.ReadSlice('\n') - } - continue - } - break // EOF 或其他错误 - } - } - return false -} diff --git a/kernel/search/find.go b/kernel/search/find.go new file mode 100644 index 000000000..41ae54740 --- /dev/null +++ b/kernel/search/find.go @@ -0,0 +1,192 @@ +// SiYuan - Refactor your thinking +// Copyright (c) 2020-present, b3log.org +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package search + +import ( + "bufio" + "bytes" + "os" + "path/filepath" + "runtime" + "sync" +) + +type Match struct { + Path string + Target string +} + +func FindAllMatchedPaths(root string, targets []string) []string { + matches := FindAllMatches(root, targets) + return pathsFromMatches(matches) +} + +func FindAllMatchedTargets(root string, targets []string) []string { + matches := FindAllMatches(root, targets) + return targetsFromMatches(matches) +} + +// FindAllMatches 遍历 root 下的文件,返回所有命中的结果(文件路径 + 命中目标) +// targets 为空或 root 为空时返回 nil +func FindAllMatches(root string, targets []string) []Match { + if root == "" || len(targets) == 0 { + return nil + } + + var searchBytes [][]byte + for _, t := range targets { + if t != "" { + searchBytes = append(searchBytes, []byte(t)) + } + } + if len(searchBytes) == 0 { + return nil + } + + jobs := make(chan string, 256) + results := make(chan Match, 256) + + var wg sync.WaitGroup + var collectWg sync.WaitGroup + + var matches []Match + collectWg.Add(1) + go func() { + defer collectWg.Done() + for m := range results { + matches = append(matches, m) + } + }() + + numWorkers := runtime.NumCPU() + for i := 0; i < numWorkers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for p := range jobs { + hits := scanFileForTargets(p, searchBytes) + if len(hits) > 0 { + for _, t := range hits { + results <- Match{Path: p, Target: t} + } + } + } + }() + } + + _ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { + if err == nil && d.Type().IsRegular() { + jobs <- path + } + return nil + }) + + close(jobs) + wg.Wait() + close(results) + collectWg.Wait() + return matches +} + +// scanFileForTargets 在文件中搜索所有目标,返回去重后的命中目标字符串列表 +func scanFileForTargets(path string, targets [][]byte) []string { + f, err := os.Open(path) + if err != nil { + return nil + } + defer f.Close() + + reader := bufio.NewReaderSize(f, 1024*1024) // 1MB 缓冲 + found := make(map[string]struct{}) + remaining := len(targets) + + for { + line, err := reader.ReadSlice('\n') + if len(line) > 0 { + for _, t := range targets { + ts := string(t) + if _, ok := found[ts]; ok { + continue + } + if bytes.Contains(line, t) { + found[ts] = struct{}{} + remaining-- + if remaining == 0 { + // 找到所有目标,提前返回 + res := make([]string, 0, len(found)) + for k := range found { + res = append(res, k) + } + return res + } + } + } + } + if err != nil { + if err == bufio.ErrBufferFull { + for err == bufio.ErrBufferFull { + _, err = reader.ReadSlice('\n') + } + continue + } + break + } + } + + if len(found) == 0 { + return nil + } + res := make([]string, 0, len(found)) + for k := range found { + res = append(res, k) + } + return res +} + +// pathsFromMatches 从 Match 列表中返回去重的路径切片(保留首次出现顺序) +func pathsFromMatches(ms []Match) []string { + if len(ms) == 0 { + return nil + } + seen := make(map[string]struct{}) + paths := make([]string, 0) + for _, m := range ms { + if _, ok := seen[m.Path]; ok { + continue + } + seen[m.Path] = struct{}{} + paths = append(paths, m.Path) + } + return paths +} + +// targetsFromMatches 从 Match 列表中返回去重的目标切片(保留首次出现顺序) +func targetsFromMatches(ms []Match) []string { + if len(ms) == 0 { + return nil + } + seen := make(map[string]struct{}) + targets := make([]string, 0) + for _, m := range ms { + if _, ok := seen[m.Target]; ok { + continue + } + seen[m.Target] = struct{}{} + targets = append(targets, m.Target) + } + return targets +} diff --git a/kernel/util/misc.go b/kernel/util/misc.go index ddefcb08e..63151eb15 100644 --- a/kernel/util/misc.go +++ b/kernel/util/misc.go @@ -207,6 +207,15 @@ func ContainsSubStr(s string, subStrs []string) bool { return false } +func GetContainsSubStrs(s string, subStrs []string) (ret []string) { + for _, v := range subStrs { + if strings.Contains(s, v) { + ret = append(ret, v) + } + } + return +} + func ReplaceStr(strs []string, old, new string) (ret []string, changed bool) { if old == new { return strs, false