🎨 Supports cleaning up unreferenced databases https://github.com/siyuan-note/siyuan/issues/11569

Signed-off-by: Daniel <845765@qq.com>
This commit is contained in:
Daniel 2026-01-27 22:58:56 +08:00
parent a77e0820ad
commit 1fa674e430
No known key found for this signature in database
GPG key ID: 86211BA83DF03017
6 changed files with 319 additions and 90 deletions

View file

@ -17,6 +17,7 @@
package api
import (
"fmt"
"net/http"
"github.com/88250/gulu"
@ -27,6 +28,24 @@ import (
"github.com/siyuan-note/siyuan/kernel/util"
)
func getUnusedAttributeViews(c *gin.Context) {
ret := gulu.Ret.NewResult()
defer c.JSON(http.StatusOK, ret)
unusedAttributeViews := model.UnusedAttributeViews()
total := len(unusedAttributeViews)
const maxUnusedAttributeViews = 512
if total > maxUnusedAttributeViews {
unusedAttributeViews = unusedAttributeViews[:maxUnusedAttributeViews]
util.PushMsg(fmt.Sprintf(model.Conf.Language(251), total, maxUnusedAttributeViews), 5000)
}
ret.Data = map[string]interface{}{
"unusedAttributeViews": unusedAttributeViews,
}
}
func getAttributeViewItemIDsByBoundIDs(c *gin.Context) {
ret := gulu.Ret.NewResult()
defer c.JSON(http.StatusOK, ret)

View file

@ -489,6 +489,7 @@ func ServeAPI(ginServer *gin.Engine) {
ginServer.Handle("POST", "/api/av/getAttributeViewAddingBlockDefaultValues", model.CheckAuth, getAttributeViewAddingBlockDefaultValues)
ginServer.Handle("POST", "/api/av/getAttributeViewBoundBlockIDsByItemIDs", model.CheckAuth, getAttributeViewBoundBlockIDsByItemIDs)
ginServer.Handle("POST", "/api/av/getAttributeViewItemIDsByBoundIDs", model.CheckAuth, getAttributeViewItemIDsByBoundIDs)
ginServer.Handle("POST", "/api/av/getUnusedAttributeViews", model.CheckAuth, getUnusedAttributeViews)
ginServer.Handle("POST", "/api/ai/chatGPT", model.CheckAuth, model.CheckAdminRole, chatGPT)
ginServer.Handle("POST", "/api/ai/chatGPTWithAction", model.CheckAuth, model.CheckAdminRole, chatGPTWithAction)

View file

@ -38,12 +38,108 @@ import (
"github.com/siyuan-note/siyuan/kernel/av"
"github.com/siyuan-note/siyuan/kernel/cache"
"github.com/siyuan-note/siyuan/kernel/filesys"
"github.com/siyuan-note/siyuan/kernel/search"
"github.com/siyuan-note/siyuan/kernel/sql"
"github.com/siyuan-note/siyuan/kernel/treenode"
"github.com/siyuan-note/siyuan/kernel/util"
"github.com/xrash/smetrics"
)
func UnusedAttributeViews() (ret []string) {
defer logging.Recover()
ret = []string{}
allAvIDs, err := getAllAvIDs()
if err != nil {
return
}
referencedAvIDs := map[string]bool{}
luteEngine := util.NewLute()
boxes := Conf.GetBoxes()
for _, box := range boxes {
pages := pagedPaths(filepath.Join(util.DataDir, box.ID), 32)
for _, paths := range pages {
var trees []*parse.Tree
for _, localPath := range paths {
tree, loadTreeErr := loadTree(localPath, luteEngine)
if nil != loadTreeErr {
continue
}
trees = append(trees, tree)
}
for _, tree := range trees {
for _, id := range getAvIDs(tree, allAvIDs) {
referencedAvIDs[id] = true
}
}
}
}
templateAvIDs := search.FindAllMatchedTargets(filepath.Join(util.DataDir, "templates"), allAvIDs)
for _, id := range templateAvIDs {
referencedAvIDs[id] = true
}
for _, id := range allAvIDs {
if !referencedAvIDs[id] {
ret = append(ret, id)
}
}
ret = gulu.Str.RemoveDuplicatedElem(ret)
return
}
func getAvIDs(tree *parse.Tree, allAvIDs []string) (ret []string) {
ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
if !entering {
return ast.WalkContinue
}
if ast.NodeAttributeView == n.Type {
ret = append(ret, n.AttributeViewID)
}
for _, kv := range n.KramdownIAL {
ids := util.GetContainsSubStrs(kv[1], allAvIDs)
if 0 < len(ids) {
ret = append(ret, ids...)
}
}
return ast.WalkContinue
})
ret = gulu.Str.RemoveDuplicatedElem(ret)
return
}
func getAllAvIDs() (ret []string, err error) {
ret = []string{}
entries, err := os.ReadDir(filepath.Join(util.DataDir, "storage", "av"))
if nil != err {
return
}
for _, entry := range entries {
name := entry.Name()
if !strings.HasSuffix(name, ".json") {
continue
}
id := strings.TrimSuffix(name, ".json")
if !ast.IsNodeIDPattern(id) {
continue
}
ret = append(ret, id)
}
ret = gulu.Str.RemoveDuplicatedElem(ret)
return
}
func GetAttributeViewItemIDs(avID string, blockIDs []string) (ret map[string]string) {
ret = map[string]string{}
for _, blockID := range blockIDs {

View file

@ -17,17 +17,13 @@
package model
import (
"bufio"
"bytes"
"errors"
"fmt"
"io/fs"
"os"
"path"
"path/filepath"
"runtime"
"strings"
"sync"
"time"
"github.com/88250/lute"
@ -38,6 +34,7 @@ import (
"github.com/siyuan-note/logging"
"github.com/siyuan-note/siyuan/kernel/av"
"github.com/siyuan-note/siyuan/kernel/filesys"
"github.com/siyuan-note/siyuan/kernel/search"
"github.com/siyuan-note/siyuan/kernel/sql"
"github.com/siyuan-note/siyuan/kernel/task"
"github.com/siyuan-note/siyuan/kernel/treenode"
@ -317,7 +314,7 @@ func findUnindexedTreePathInAllBoxes(id string) (ret string) {
boxes := Conf.GetBoxes()
for _, box := range boxes {
root := filepath.Join(util.DataDir, box.ID)
paths := findAllOccurrences(root, id)
paths := search.FindAllMatchedPaths(root, []string{id})
var rootIDs []string
rootIDPaths := map[string]string{}
for _, p := range paths {
@ -335,88 +332,3 @@ func findUnindexedTreePathInAllBoxes(id string) (ret string) {
}
return
}
func findAllOccurrences(root string, target string) []string {
if root == "" || target == "" {
return nil
}
searchBytes := []byte(target)
jobs := make(chan string, 256) // 任务通道
results := make(chan string, 256) // 结果通道
// 用于等待所有 Worker 完成
var wg sync.WaitGroup
// 用于等待结果收集器完成
var collectWg sync.WaitGroup
// 1. 启动结果收集协程
var matchedPaths []string
collectWg.Add(1)
go func() {
defer collectWg.Done()
for path := range results {
matchedPaths = append(matchedPaths, path)
}
}()
// 2. 启动并发 Worker Pool (基于 CPU 核心数)
numWorkers := runtime.NumCPU()
for i := 0; i < numWorkers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for path := range jobs {
if containsTarget(path, searchBytes) {
results <- path
}
}
}()
}
// 3. 遍历文件夹并分发任务
_ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
if err == nil && d.Type().IsRegular() {
jobs <- path
}
return nil
})
// 4. 关闭通道并等待结束
close(jobs) // 停止分发任务
wg.Wait() // 等待所有 Worker 处理完
close(results) // 停止收集结果
collectWg.Wait() // 等待切片组装完成
return matchedPaths
}
// containsTarget 针对大文件优化的字节流匹配函数
func containsTarget(path string, target []byte) bool {
f, err := os.Open(path)
if err != nil {
return false
}
defer f.Close()
// 1MB 缓冲区
reader := bufio.NewReaderSize(f, 1024*1024)
for {
// 使用 ReadSlice 实现零拷贝读取
line, err := reader.ReadSlice('\n')
if len(line) > 0 && bytes.Contains(line, target) {
return true
}
if err != nil {
if err == bufio.ErrBufferFull {
// 处理超过 1MB 的超长行,直接跳过当前行剩余部分
for err == bufio.ErrBufferFull {
_, err = reader.ReadSlice('\n')
}
continue
}
break // EOF 或其他错误
}
}
return false
}

192
kernel/search/find.go Normal file
View file

@ -0,0 +1,192 @@
// SiYuan - Refactor your thinking
// Copyright (c) 2020-present, b3log.org
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package search
import (
"bufio"
"bytes"
"os"
"path/filepath"
"runtime"
"sync"
)
type Match struct {
Path string
Target string
}
func FindAllMatchedPaths(root string, targets []string) []string {
matches := FindAllMatches(root, targets)
return pathsFromMatches(matches)
}
func FindAllMatchedTargets(root string, targets []string) []string {
matches := FindAllMatches(root, targets)
return targetsFromMatches(matches)
}
// FindAllMatches 遍历 root 下的文件,返回所有命中的结果(文件路径 + 命中目标)
// targets 为空或 root 为空时返回 nil
func FindAllMatches(root string, targets []string) []Match {
if root == "" || len(targets) == 0 {
return nil
}
var searchBytes [][]byte
for _, t := range targets {
if t != "" {
searchBytes = append(searchBytes, []byte(t))
}
}
if len(searchBytes) == 0 {
return nil
}
jobs := make(chan string, 256)
results := make(chan Match, 256)
var wg sync.WaitGroup
var collectWg sync.WaitGroup
var matches []Match
collectWg.Add(1)
go func() {
defer collectWg.Done()
for m := range results {
matches = append(matches, m)
}
}()
numWorkers := runtime.NumCPU()
for i := 0; i < numWorkers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for p := range jobs {
hits := scanFileForTargets(p, searchBytes)
if len(hits) > 0 {
for _, t := range hits {
results <- Match{Path: p, Target: t}
}
}
}
}()
}
_ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
if err == nil && d.Type().IsRegular() {
jobs <- path
}
return nil
})
close(jobs)
wg.Wait()
close(results)
collectWg.Wait()
return matches
}
// scanFileForTargets 在文件中搜索所有目标,返回去重后的命中目标字符串列表
func scanFileForTargets(path string, targets [][]byte) []string {
f, err := os.Open(path)
if err != nil {
return nil
}
defer f.Close()
reader := bufio.NewReaderSize(f, 1024*1024) // 1MB 缓冲
found := make(map[string]struct{})
remaining := len(targets)
for {
line, err := reader.ReadSlice('\n')
if len(line) > 0 {
for _, t := range targets {
ts := string(t)
if _, ok := found[ts]; ok {
continue
}
if bytes.Contains(line, t) {
found[ts] = struct{}{}
remaining--
if remaining == 0 {
// 找到所有目标,提前返回
res := make([]string, 0, len(found))
for k := range found {
res = append(res, k)
}
return res
}
}
}
}
if err != nil {
if err == bufio.ErrBufferFull {
for err == bufio.ErrBufferFull {
_, err = reader.ReadSlice('\n')
}
continue
}
break
}
}
if len(found) == 0 {
return nil
}
res := make([]string, 0, len(found))
for k := range found {
res = append(res, k)
}
return res
}
// pathsFromMatches 从 Match 列表中返回去重的路径切片(保留首次出现顺序)
func pathsFromMatches(ms []Match) []string {
if len(ms) == 0 {
return nil
}
seen := make(map[string]struct{})
paths := make([]string, 0)
for _, m := range ms {
if _, ok := seen[m.Path]; ok {
continue
}
seen[m.Path] = struct{}{}
paths = append(paths, m.Path)
}
return paths
}
// targetsFromMatches 从 Match 列表中返回去重的目标切片(保留首次出现顺序)
func targetsFromMatches(ms []Match) []string {
if len(ms) == 0 {
return nil
}
seen := make(map[string]struct{})
targets := make([]string, 0)
for _, m := range ms {
if _, ok := seen[m.Target]; ok {
continue
}
seen[m.Target] = struct{}{}
targets = append(targets, m.Target)
}
return targets
}

View file

@ -207,6 +207,15 @@ func ContainsSubStr(s string, subStrs []string) bool {
return false
}
func GetContainsSubStrs(s string, subStrs []string) (ret []string) {
for _, v := range subStrs {
if strings.Contains(s, v) {
ret = append(ret, v)
}
}
return
}
func ReplaceStr(strs []string, old, new string) (ret []string, changed bool) {
if old == new {
return strs, false