mirror of
https://github.com/siyuan-note/siyuan.git
synced 2026-02-08 00:04:21 +01:00
Merge remote-tracking branch 'origin/dev' into dev
This commit is contained in:
commit
af3fdcb953
7 changed files with 356 additions and 90 deletions
|
|
@ -17,6 +17,7 @@
|
|||
package api
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/88250/gulu"
|
||||
|
|
@ -27,6 +28,24 @@ import (
|
|||
"github.com/siyuan-note/siyuan/kernel/util"
|
||||
)
|
||||
|
||||
func getUnusedAttributeViews(c *gin.Context) {
|
||||
ret := gulu.Ret.NewResult()
|
||||
defer c.JSON(http.StatusOK, ret)
|
||||
|
||||
unusedAttributeViews := model.UnusedAttributeViews()
|
||||
total := len(unusedAttributeViews)
|
||||
|
||||
const maxUnusedAttributeViews = 512
|
||||
if total > maxUnusedAttributeViews {
|
||||
unusedAttributeViews = unusedAttributeViews[:maxUnusedAttributeViews]
|
||||
util.PushMsg(fmt.Sprintf(model.Conf.Language(251), total, maxUnusedAttributeViews), 5000)
|
||||
}
|
||||
|
||||
ret.Data = map[string]interface{}{
|
||||
"unusedAttributeViews": unusedAttributeViews,
|
||||
}
|
||||
}
|
||||
|
||||
func getAttributeViewItemIDsByBoundIDs(c *gin.Context) {
|
||||
ret := gulu.Ret.NewResult()
|
||||
defer c.JSON(http.StatusOK, ret)
|
||||
|
|
|
|||
|
|
@ -44,6 +44,10 @@ func getDocOutline(c *gin.Context) {
|
|||
}
|
||||
|
||||
rootID := arg["id"].(string)
|
||||
if util.InvalidIDPattern(rootID, ret) {
|
||||
return
|
||||
}
|
||||
|
||||
headings, err := model.Outline(rootID, preview)
|
||||
if err != nil {
|
||||
ret.Code = 1
|
||||
|
|
|
|||
|
|
@ -489,6 +489,7 @@ func ServeAPI(ginServer *gin.Engine) {
|
|||
ginServer.Handle("POST", "/api/av/getAttributeViewAddingBlockDefaultValues", model.CheckAuth, getAttributeViewAddingBlockDefaultValues)
|
||||
ginServer.Handle("POST", "/api/av/getAttributeViewBoundBlockIDsByItemIDs", model.CheckAuth, getAttributeViewBoundBlockIDsByItemIDs)
|
||||
ginServer.Handle("POST", "/api/av/getAttributeViewItemIDsByBoundIDs", model.CheckAuth, getAttributeViewItemIDsByBoundIDs)
|
||||
ginServer.Handle("POST", "/api/av/getUnusedAttributeViews", model.CheckAuth, getUnusedAttributeViews)
|
||||
|
||||
ginServer.Handle("POST", "/api/ai/chatGPT", model.CheckAuth, model.CheckAdminRole, chatGPT)
|
||||
ginServer.Handle("POST", "/api/ai/chatGPTWithAction", model.CheckAuth, model.CheckAdminRole, chatGPTWithAction)
|
||||
|
|
|
|||
|
|
@ -38,12 +38,108 @@ import (
|
|||
"github.com/siyuan-note/siyuan/kernel/av"
|
||||
"github.com/siyuan-note/siyuan/kernel/cache"
|
||||
"github.com/siyuan-note/siyuan/kernel/filesys"
|
||||
"github.com/siyuan-note/siyuan/kernel/search"
|
||||
"github.com/siyuan-note/siyuan/kernel/sql"
|
||||
"github.com/siyuan-note/siyuan/kernel/treenode"
|
||||
"github.com/siyuan-note/siyuan/kernel/util"
|
||||
"github.com/xrash/smetrics"
|
||||
)
|
||||
|
||||
func UnusedAttributeViews() (ret []string) {
|
||||
defer logging.Recover()
|
||||
ret = []string{}
|
||||
|
||||
allAvIDs, err := getAllAvIDs()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
referencedAvIDs := map[string]bool{}
|
||||
luteEngine := util.NewLute()
|
||||
boxes := Conf.GetBoxes()
|
||||
for _, box := range boxes {
|
||||
pages := pagedPaths(filepath.Join(util.DataDir, box.ID), 32)
|
||||
for _, paths := range pages {
|
||||
var trees []*parse.Tree
|
||||
for _, localPath := range paths {
|
||||
tree, loadTreeErr := loadTree(localPath, luteEngine)
|
||||
if nil != loadTreeErr {
|
||||
continue
|
||||
}
|
||||
trees = append(trees, tree)
|
||||
}
|
||||
for _, tree := range trees {
|
||||
for _, id := range getAvIDs(tree, allAvIDs) {
|
||||
referencedAvIDs[id] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
templateAvIDs := search.FindAllMatchedTargets(filepath.Join(util.DataDir, "templates"), allAvIDs)
|
||||
for _, id := range templateAvIDs {
|
||||
referencedAvIDs[id] = true
|
||||
}
|
||||
|
||||
for _, id := range allAvIDs {
|
||||
if !referencedAvIDs[id] {
|
||||
ret = append(ret, id)
|
||||
}
|
||||
}
|
||||
|
||||
ret = gulu.Str.RemoveDuplicatedElem(ret)
|
||||
return
|
||||
}
|
||||
|
||||
func getAvIDs(tree *parse.Tree, allAvIDs []string) (ret []string) {
|
||||
ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
|
||||
if !entering {
|
||||
return ast.WalkContinue
|
||||
}
|
||||
|
||||
if ast.NodeAttributeView == n.Type {
|
||||
ret = append(ret, n.AttributeViewID)
|
||||
}
|
||||
|
||||
for _, kv := range n.KramdownIAL {
|
||||
ids := util.GetContainsSubStrs(kv[1], allAvIDs)
|
||||
if 0 < len(ids) {
|
||||
ret = append(ret, ids...)
|
||||
}
|
||||
}
|
||||
|
||||
return ast.WalkContinue
|
||||
})
|
||||
|
||||
ret = gulu.Str.RemoveDuplicatedElem(ret)
|
||||
return
|
||||
}
|
||||
|
||||
func getAllAvIDs() (ret []string, err error) {
|
||||
ret = []string{}
|
||||
|
||||
entries, err := os.ReadDir(filepath.Join(util.DataDir, "storage", "av"))
|
||||
if nil != err {
|
||||
return
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
name := entry.Name()
|
||||
if !strings.HasSuffix(name, ".json") {
|
||||
continue
|
||||
}
|
||||
|
||||
id := strings.TrimSuffix(name, ".json")
|
||||
if !ast.IsNodeIDPattern(id) {
|
||||
continue
|
||||
}
|
||||
|
||||
ret = append(ret, id)
|
||||
}
|
||||
ret = gulu.Str.RemoveDuplicatedElem(ret)
|
||||
return
|
||||
}
|
||||
|
||||
func GetAttributeViewItemIDs(avID string, blockIDs []string) (ret map[string]string) {
|
||||
ret = map[string]string{}
|
||||
for _, blockID := range blockIDs {
|
||||
|
|
|
|||
|
|
@ -17,17 +17,13 @@
|
|||
package model
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/88250/lute"
|
||||
|
|
@ -38,6 +34,7 @@ import (
|
|||
"github.com/siyuan-note/logging"
|
||||
"github.com/siyuan-note/siyuan/kernel/av"
|
||||
"github.com/siyuan-note/siyuan/kernel/filesys"
|
||||
"github.com/siyuan-note/siyuan/kernel/search"
|
||||
"github.com/siyuan-note/siyuan/kernel/sql"
|
||||
"github.com/siyuan-note/siyuan/kernel/task"
|
||||
"github.com/siyuan-note/siyuan/kernel/treenode"
|
||||
|
|
@ -317,7 +314,7 @@ func findUnindexedTreePathInAllBoxes(id string) (ret string) {
|
|||
boxes := Conf.GetBoxes()
|
||||
for _, box := range boxes {
|
||||
root := filepath.Join(util.DataDir, box.ID)
|
||||
paths := findAllOccurrences(root, id)
|
||||
paths := search.FindAllMatchedPaths(root, []string{id})
|
||||
var rootIDs []string
|
||||
rootIDPaths := map[string]string{}
|
||||
for _, p := range paths {
|
||||
|
|
@ -335,88 +332,3 @@ func findUnindexedTreePathInAllBoxes(id string) (ret string) {
|
|||
}
|
||||
return
|
||||
}
|
||||
|
||||
func findAllOccurrences(root string, target string) []string {
|
||||
if root == "" || target == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
searchBytes := []byte(target)
|
||||
jobs := make(chan string, 256) // 任务通道
|
||||
results := make(chan string, 256) // 结果通道
|
||||
|
||||
// 用于等待所有 Worker 完成
|
||||
var wg sync.WaitGroup
|
||||
// 用于等待结果收集器完成
|
||||
var collectWg sync.WaitGroup
|
||||
|
||||
// 1. 启动结果收集协程
|
||||
var matchedPaths []string
|
||||
collectWg.Add(1)
|
||||
go func() {
|
||||
defer collectWg.Done()
|
||||
for path := range results {
|
||||
matchedPaths = append(matchedPaths, path)
|
||||
}
|
||||
}()
|
||||
|
||||
// 2. 启动并发 Worker Pool (基于 CPU 核心数)
|
||||
numWorkers := runtime.NumCPU()
|
||||
for i := 0; i < numWorkers; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for path := range jobs {
|
||||
if containsTarget(path, searchBytes) {
|
||||
results <- path
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// 3. 遍历文件夹并分发任务
|
||||
_ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
|
||||
if err == nil && d.Type().IsRegular() {
|
||||
jobs <- path
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
// 4. 关闭通道并等待结束
|
||||
close(jobs) // 停止分发任务
|
||||
wg.Wait() // 等待所有 Worker 处理完
|
||||
close(results) // 停止收集结果
|
||||
collectWg.Wait() // 等待切片组装完成
|
||||
|
||||
return matchedPaths
|
||||
}
|
||||
|
||||
// containsTarget 针对大文件优化的字节流匹配函数
|
||||
func containsTarget(path string, target []byte) bool {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// 1MB 缓冲区
|
||||
reader := bufio.NewReaderSize(f, 1024*1024)
|
||||
for {
|
||||
// 使用 ReadSlice 实现零拷贝读取
|
||||
line, err := reader.ReadSlice('\n')
|
||||
if len(line) > 0 && bytes.Contains(line, target) {
|
||||
return true
|
||||
}
|
||||
if err != nil {
|
||||
if err == bufio.ErrBufferFull {
|
||||
// 处理超过 1MB 的超长行,直接跳过当前行剩余部分
|
||||
for err == bufio.ErrBufferFull {
|
||||
_, err = reader.ReadSlice('\n')
|
||||
}
|
||||
continue
|
||||
}
|
||||
break // EOF 或其他错误
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
|||
225
kernel/search/find.go
Normal file
225
kernel/search/find.go
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
// SiYuan - Refactor your thinking
|
||||
// Copyright (c) 2020-present, b3log.org
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package search
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type Match struct {
|
||||
Path string
|
||||
Target string
|
||||
}
|
||||
|
||||
func FindAllMatchedPaths(root string, targets []string) []string {
|
||||
matches := FindAllMatches(root, targets)
|
||||
return pathsFromMatches(matches)
|
||||
}
|
||||
|
||||
func FindAllMatchedTargets(root string, targets []string) []string {
|
||||
matches := FindAllMatches(root, targets)
|
||||
return targetsFromMatches(matches)
|
||||
}
|
||||
|
||||
// FindAllMatches 遍历 root 下的文件,返回所有命中的结果(文件路径 + 命中目标)
|
||||
// targets 为空或 root 为空时返回 nil
|
||||
func FindAllMatches(root string, targets []string) []Match {
|
||||
if root == "" || len(targets) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 构建基于首字节的模式索引,并计算最长模式长度
|
||||
patternIndex := make(map[byte][][]byte)
|
||||
var maxLen int
|
||||
for _, t := range targets {
|
||||
if t == "" {
|
||||
continue
|
||||
}
|
||||
b := []byte(t)
|
||||
if len(b) > maxLen {
|
||||
maxLen = len(b)
|
||||
}
|
||||
patternIndex[b[0]] = append(patternIndex[b[0]], b)
|
||||
}
|
||||
if len(patternIndex) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
jobs := make(chan string, 256)
|
||||
results := make(chan Match, 256)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
var collectWg sync.WaitGroup
|
||||
|
||||
var matches []Match
|
||||
collectWg.Add(1)
|
||||
go func() {
|
||||
defer collectWg.Done()
|
||||
for m := range results {
|
||||
matches = append(matches, m)
|
||||
}
|
||||
}()
|
||||
|
||||
numWorkers := runtime.NumCPU()
|
||||
for i := 0; i < numWorkers; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for p := range jobs {
|
||||
hits := scanFileForTargets(p, patternIndex, maxLen)
|
||||
if len(hits) > 0 {
|
||||
for _, t := range hits {
|
||||
results <- Match{Path: p, Target: t}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
_ = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
|
||||
if err == nil && d.Type().IsRegular() {
|
||||
jobs <- path
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
close(jobs)
|
||||
wg.Wait()
|
||||
close(results)
|
||||
collectWg.Wait()
|
||||
return matches
|
||||
}
|
||||
|
||||
// scanFileForTargets 在文件中流式搜索所有目标(基于首字节索引),返回去重后的命中目标字符串列表
|
||||
func scanFileForTargets(path string, patternIndex map[byte][][]byte, maxLen int) []string {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// 构建字节位图,加速首字节检测
|
||||
var bitmap [256]bool
|
||||
for b := range patternIndex {
|
||||
bitmap[b] = true
|
||||
}
|
||||
|
||||
found := make(map[string]struct{})
|
||||
buf := make([]byte, 64<<10) // 64KB
|
||||
// 保留上一次块末尾的重叠数据以支持跨块匹配
|
||||
var tail []byte
|
||||
|
||||
for {
|
||||
n, err := f.Read(buf)
|
||||
if n > 0 {
|
||||
// data = tail + buf[:n]
|
||||
data := make([]byte, len(tail)+n)
|
||||
copy(data, tail)
|
||||
copy(data[len(tail):], buf[:n])
|
||||
|
||||
// 扫描 data,查找任意候选首字节位置
|
||||
i := 0
|
||||
for i < len(data) {
|
||||
// 快速跳过非候选字节
|
||||
for i < len(data) && !bitmap[data[i]] {
|
||||
i++
|
||||
}
|
||||
if i >= len(data) {
|
||||
break
|
||||
}
|
||||
b := data[i]
|
||||
// 对应首字节的所有模式进行校验
|
||||
for _, pat := range patternIndex[b] {
|
||||
pl := len(pat)
|
||||
// 如果剩余字节不足以完全匹配,则交由下一轮(通过 tail 保证)
|
||||
if i+pl <= len(data) {
|
||||
if bytes.Equal(pat, data[i:i+pl]) {
|
||||
found[string(pat)] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
// 保留最后 maxLen-1 字节作为下一块的 tail(避免超长内存分配)
|
||||
if maxLen <= 1 {
|
||||
tail = nil
|
||||
} else {
|
||||
if len(data) >= maxLen-1 {
|
||||
tail = append(tail[:0], data[len(data)-(maxLen-1):]...)
|
||||
} else {
|
||||
tail = append(tail[:0], data...)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
// 读取出错,返回已有结果
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(found) == 0 {
|
||||
return nil
|
||||
}
|
||||
res := make([]string, 0, len(found))
|
||||
for k := range found {
|
||||
res = append(res, k)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// pathsFromMatches 从 Match 列表中返回去重的路径切片(保留首次出现顺序)
|
||||
func pathsFromMatches(ms []Match) []string {
|
||||
if len(ms) == 0 {
|
||||
return nil
|
||||
}
|
||||
seen := make(map[string]struct{})
|
||||
paths := make([]string, 0)
|
||||
for _, m := range ms {
|
||||
if _, ok := seen[m.Path]; ok {
|
||||
continue
|
||||
}
|
||||
seen[m.Path] = struct{}{}
|
||||
paths = append(paths, m.Path)
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
// targetsFromMatches 从 Match 列表中返回去重的目标切片(保留首次出现顺序)
|
||||
func targetsFromMatches(ms []Match) []string {
|
||||
if len(ms) == 0 {
|
||||
return nil
|
||||
}
|
||||
seen := make(map[string]struct{})
|
||||
targets := make([]string, 0)
|
||||
for _, m := range ms {
|
||||
if _, ok := seen[m.Target]; ok {
|
||||
continue
|
||||
}
|
||||
seen[m.Target] = struct{}{}
|
||||
targets = append(targets, m.Target)
|
||||
}
|
||||
return targets
|
||||
}
|
||||
|
|
@ -207,6 +207,15 @@ func ContainsSubStr(s string, subStrs []string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func GetContainsSubStrs(s string, subStrs []string) (ret []string) {
|
||||
for _, v := range subStrs {
|
||||
if strings.Contains(s, v) {
|
||||
ret = append(ret, v)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func ReplaceStr(strs []string, old, new string) (ret []string, changed bool) {
|
||||
if old == new {
|
||||
return strs, false
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue