2023-06-24 20:39:55 +08:00
// SiYuan - Refactor your thinking
2022-05-26 15:18:53 +08:00
// Copyright (c) 2020-present, b3log.org
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package model
import (
"bytes"
2022-11-28 16:30:08 +08:00
"errors"
2022-11-29 23:21:19 +08:00
"fmt"
2023-06-29 18:14:36 +08:00
"math"
2023-04-03 11:25:56 +08:00
"os"
2022-05-26 15:18:53 +08:00
"path"
2023-04-03 11:25:56 +08:00
"path/filepath"
2022-11-28 11:54:04 +08:00
"regexp"
2022-11-30 12:11:49 +08:00
"sort"
2022-05-26 15:18:53 +08:00
"strconv"
"strings"
2024-01-10 09:48:14 +08:00
"sync"
2022-05-26 15:18:53 +08:00
"time"
"unicode/utf8"
"github.com/88250/gulu"
2022-10-26 09:51:09 +08:00
"github.com/88250/lute"
2022-05-26 15:18:53 +08:00
"github.com/88250/lute/ast"
2024-05-17 11:20:24 +08:00
"github.com/88250/lute/html"
2022-10-26 09:51:09 +08:00
"github.com/88250/lute/lex"
2022-05-26 15:18:53 +08:00
"github.com/88250/lute/parse"
2023-04-21 10:46:55 +08:00
"github.com/88250/vitess-sqlparser/sqlparser"
2022-05-26 15:18:53 +08:00
"github.com/jinzhu/copier"
2023-04-03 11:25:56 +08:00
"github.com/siyuan-note/filelock"
2022-07-17 12:22:32 +08:00
"github.com/siyuan-note/logging"
2022-05-26 15:18:53 +08:00
"github.com/siyuan-note/siyuan/kernel/conf"
"github.com/siyuan-note/siyuan/kernel/search"
"github.com/siyuan-note/siyuan/kernel/sql"
2023-04-03 11:25:56 +08:00
"github.com/siyuan-note/siyuan/kernel/task"
2022-05-26 15:18:53 +08:00
"github.com/siyuan-note/siyuan/kernel/treenode"
"github.com/siyuan-note/siyuan/kernel/util"
"github.com/xrash/smetrics"
)
2024-02-26 11:16:36 +08:00
func ListInvalidBlockRefs ( page , pageSize int ) ( ret [ ] * Block , matchedBlockCount , matchedRootCount , pageCount int ) {
refBlockMap := map [ string ] [ ] string { }
blockMap := map [ string ] bool { }
var invalidBlockIDs [ ] string
notebooks , err := ListNotebooks ( )
2024-09-04 04:40:50 +03:00
if err != nil {
2024-02-26 11:16:36 +08:00
return
}
luteEngine := util . NewLute ( )
for _ , notebook := range notebooks {
pages := pagedPaths ( filepath . Join ( util . DataDir , notebook . ID ) , 32 )
for _ , paths := range pages {
var trees [ ] * parse . Tree
for _ , localPath := range paths {
tree , loadTreeErr := loadTree ( localPath , luteEngine )
if nil != loadTreeErr {
continue
}
trees = append ( trees , tree )
}
for _ , tree := range trees {
ast . Walk ( tree . Root , func ( n * ast . Node , entering bool ) ast . WalkStatus {
if entering {
if n . IsBlock ( ) {
blockMap [ n . ID ] = true
return ast . WalkContinue
}
if ast . NodeTextMark == n . Type {
if n . IsTextMarkType ( "a" ) {
if strings . HasPrefix ( n . TextMarkAHref , "siyuan://blocks/" ) {
defID := strings . TrimPrefix ( n . TextMarkAHref , "siyuan://blocks/" )
if strings . Contains ( defID , "?" ) {
defID = strings . Split ( defID , "?" ) [ 0 ]
}
refID := treenode . ParentBlock ( n ) . ID
if defIDs := refBlockMap [ refID ] ; 1 > len ( defIDs ) {
refBlockMap [ refID ] = [ ] string { defID }
} else {
refBlockMap [ refID ] = append ( defIDs , defID )
}
}
} else if n . IsTextMarkType ( "block-ref" ) {
defID := n . TextMarkBlockRefID
refID := treenode . ParentBlock ( n ) . ID
if defIDs := refBlockMap [ refID ] ; 1 > len ( defIDs ) {
refBlockMap [ refID ] = [ ] string { defID }
} else {
refBlockMap [ refID ] = append ( defIDs , defID )
}
}
}
}
return ast . WalkContinue
} )
}
}
}
invalidDefIDs := map [ string ] bool { }
for _ , refDefIDs := range refBlockMap {
for _ , defID := range refDefIDs {
invalidDefIDs [ defID ] = true
}
}
var toRemoves [ ] string
for defID , _ := range invalidDefIDs {
if _ , ok := blockMap [ defID ] ; ok {
toRemoves = append ( toRemoves , defID )
}
}
for _ , toRemove := range toRemoves {
delete ( invalidDefIDs , toRemove )
}
toRemoves = nil
for refID , defIDs := range refBlockMap {
var tmp [ ] string
for _ , defID := range defIDs {
if _ , ok := invalidDefIDs [ defID ] ; ! ok {
tmp = append ( tmp , defID )
}
}
for _ , toRemove := range tmp {
defIDs = gulu . Str . RemoveElem ( defIDs , toRemove )
}
if 1 > len ( defIDs ) {
toRemoves = append ( toRemoves , refID )
}
}
for _ , toRemove := range toRemoves {
delete ( refBlockMap , toRemove )
}
for refID , _ := range refBlockMap {
invalidBlockIDs = append ( invalidBlockIDs , refID )
}
invalidBlockIDs = gulu . Str . RemoveDuplicatedElem ( invalidBlockIDs )
sort . Strings ( invalidBlockIDs )
2024-02-28 22:37:11 +08:00
allInvalidBlockIDs := invalidBlockIDs
2024-02-26 11:16:36 +08:00
start := ( page - 1 ) * pageSize
end := page * pageSize
if end > len ( invalidBlockIDs ) {
end = len ( invalidBlockIDs )
}
invalidBlockIDs = invalidBlockIDs [ start : end ]
sqlBlocks := sql . GetBlocks ( invalidBlockIDs )
2024-02-28 22:37:11 +08:00
var tmp [ ] * sql . Block
for _ , sqlBlock := range sqlBlocks {
if nil != sqlBlock {
tmp = append ( tmp , sqlBlock )
}
}
sqlBlocks = tmp
2024-02-26 11:16:36 +08:00
ret = fromSQLBlocks ( & sqlBlocks , "" , 36 )
if 1 > len ( ret ) {
ret = [ ] * Block { }
}
2024-02-28 22:37:11 +08:00
matchedBlockCount = len ( allInvalidBlockIDs )
2024-02-26 11:16:36 +08:00
rootCount := map [ string ] bool { }
2024-02-28 22:37:11 +08:00
for _ , id := range allInvalidBlockIDs {
bt := treenode . GetBlockTree ( id )
if nil == bt {
2024-02-26 11:16:36 +08:00
continue
}
2024-02-28 22:37:11 +08:00
rootCount [ bt . RootID ] = true
2024-02-26 11:16:36 +08:00
}
matchedRootCount = len ( rootCount )
pageCount = ( matchedBlockCount + pageSize - 1 ) / pageSize
return
}
2022-10-12 10:11:08 +08:00
type EmbedBlock struct {
2022-10-12 15:18:22 +08:00
Block * Block ` json:"block" `
BlockPaths [ ] * BlockPath ` json:"blockPaths" `
2022-10-12 10:11:08 +08:00
}
2023-11-24 17:37:34 +08:00
func UpdateEmbedBlock ( id , content string ) ( err error ) {
bt := treenode . GetBlockTree ( id )
if nil == bt {
err = ErrBlockNotFound
return
}
if treenode . TypeAbbr ( ast . NodeBlockQueryEmbed . String ( ) ) != bt . Type {
err = errors . New ( "not query embed block" )
return
}
embedBlock := & EmbedBlock {
Block : & Block {
Markdown : content ,
} ,
}
updateEmbedBlockContent ( id , [ ] * EmbedBlock { embedBlock } )
return
}
2023-11-18 12:03:22 +08:00
func GetEmbedBlock ( embedBlockID string , includeIDs [ ] string , headingMode int , breadcrumb bool ) ( ret [ ] * EmbedBlock ) {
return getEmbedBlock ( embedBlockID , includeIDs , headingMode , breadcrumb )
}
func getEmbedBlock ( embedBlockID string , includeIDs [ ] string , headingMode int , breadcrumb bool ) ( ret [ ] * EmbedBlock ) {
stmt := "SELECT * FROM `blocks` WHERE `id` IN ('" + strings . Join ( includeIDs , "','" ) + "')"
sqlBlocks := sql . SelectBlocksRawStmtNoParse ( stmt , 1024 )
2023-12-26 21:51:17 +08:00
// 根据 includeIDs 的顺序排序 Improve `//!js` query embed block result sorting https://github.com/siyuan-note/siyuan/issues/9977
m := map [ string ] int { }
for i , id := range includeIDs {
m [ id ] = i
}
sort . Slice ( sqlBlocks , func ( i , j int ) bool {
return m [ sqlBlocks [ i ] . ID ] < m [ sqlBlocks [ j ] . ID ]
} )
2023-11-18 12:03:22 +08:00
ret = buildEmbedBlock ( embedBlockID , [ ] string { } , headingMode , breadcrumb , sqlBlocks )
return
}
2022-10-18 20:41:20 +08:00
func SearchEmbedBlock ( embedBlockID , stmt string , excludeIDs [ ] string , headingMode int , breadcrumb bool ) ( ret [ ] * EmbedBlock ) {
return searchEmbedBlock ( embedBlockID , stmt , excludeIDs , headingMode , breadcrumb )
2022-05-26 15:18:53 +08:00
}
2022-10-18 20:41:20 +08:00
func searchEmbedBlock ( embedBlockID , stmt string , excludeIDs [ ] string , headingMode int , breadcrumb bool ) ( ret [ ] * EmbedBlock ) {
2022-05-26 15:18:53 +08:00
sqlBlocks := sql . SelectBlocksRawStmtNoParse ( stmt , Conf . Search . Limit )
2023-11-18 12:03:22 +08:00
ret = buildEmbedBlock ( embedBlockID , excludeIDs , headingMode , breadcrumb , sqlBlocks )
return
}
func buildEmbedBlock ( embedBlockID string , excludeIDs [ ] string , headingMode int , breadcrumb bool , sqlBlocks [ ] * sql . Block ) ( ret [ ] * EmbedBlock ) {
2022-05-26 15:18:53 +08:00
var tmp [ ] * sql . Block
for _ , b := range sqlBlocks {
2023-01-19 01:16:13 +08:00
if "query_embed" == b . Type { // 嵌入块不再嵌入
// 嵌入块支持搜索 https://github.com/siyuan-note/siyuan/issues/7112
// 这里会导致上面的 limit 限制不准确,导致结果变少,暂时没有解决方案,只能靠用户自己调整 SQL, 加上 type != 'query_embed' 的条件
continue
}
2022-05-26 15:18:53 +08:00
if ! gulu . Str . Contains ( b . ID , excludeIDs ) {
tmp = append ( tmp , b )
}
}
sqlBlocks = tmp
2022-10-12 10:11:08 +08:00
// 缓存最多 128 棵语法树
trees := map [ string ] * parse . Tree { }
count := 0
for _ , sb := range sqlBlocks {
if nil == trees [ sb . RootID ] {
2024-03-10 23:27:13 +08:00
tree , _ := LoadTreeByBlockID ( sb . RootID )
2022-10-12 10:11:08 +08:00
if nil == tree {
continue
}
trees [ sb . RootID ] = tree
count ++
}
if 127 < count {
break
}
}
2022-05-26 15:18:53 +08:00
for _ , sb := range sqlBlocks {
2023-11-18 12:03:22 +08:00
block , blockPaths := getEmbeddedBlock ( trees , sb , headingMode , breadcrumb )
2022-05-26 15:18:53 +08:00
if nil == block {
continue
}
2022-10-12 10:11:08 +08:00
ret = append ( ret , & EmbedBlock {
2022-10-12 15:18:22 +08:00
Block : block ,
BlockPaths : blockPaths ,
2022-10-12 10:11:08 +08:00
} )
2022-05-26 15:18:53 +08:00
}
2023-01-19 20:51:32 +08:00
// 嵌入块支持搜索 https://github.com/siyuan-note/siyuan/issues/7112
2023-02-11 09:56:39 +08:00
task . AppendTaskWithTimeout ( task . DatabaseIndexEmbedBlock , 30 * time . Second , updateEmbedBlockContent , embedBlockID , ret )
2023-01-19 20:51:32 +08:00
2022-10-12 11:44:04 +08:00
// 添加笔记本名称
var boxIDs [ ] string
for _ , embedBlock := range ret {
boxIDs = append ( boxIDs , embedBlock . Block . Box )
}
boxIDs = gulu . Str . RemoveDuplicatedElem ( boxIDs )
boxNames := Conf . BoxNames ( boxIDs )
for _ , embedBlock := range ret {
name := boxNames [ embedBlock . Block . Box ]
embedBlock . Block . HPath = name + embedBlock . Block . HPath
}
2022-05-26 15:18:53 +08:00
if 1 > len ( ret ) {
2022-10-12 10:11:08 +08:00
ret = [ ] * EmbedBlock { }
2022-05-26 15:18:53 +08:00
}
return
}
2024-06-15 21:47:49 +08:00
func SearchRefBlock ( id , rootID , keyword string , beforeLen int , isSquareBrackets , isDatabase bool ) ( ret [ ] * Block , newDoc bool ) {
2023-02-05 18:32:22 +08:00
cachedTrees := map [ string ] * parse . Tree { }
2023-04-22 09:27:40 +08:00
onlyDoc := false
if isSquareBrackets {
onlyDoc = Conf . Editor . OnlySearchForDoc
}
2022-05-26 15:18:53 +08:00
if "" == keyword {
// 查询为空时默认的块引排序规则按最近使用优先 https://github.com/siyuan-note/siyuan/issues/3218
2024-06-27 10:51:04 +08:00
2024-07-24 12:07:40 +08:00
typeFilter := Conf . Search . TypeFilter ( )
2024-06-27 10:51:04 +08:00
ignoreLines := getRefSearchIgnoreLines ( )
2024-07-24 12:07:40 +08:00
refs := sql . QueryRefsRecent ( onlyDoc , typeFilter , ignoreLines )
2024-07-14 11:48:56 +08:00
var btsID [ ] string
for _ , ref := range refs {
btsID = append ( btsID , ref . DefBlockRootID )
}
2024-07-14 11:55:16 +08:00
btsID = gulu . Str . RemoveDuplicatedElem ( btsID )
2024-07-14 11:48:56 +08:00
bts := treenode . GetBlockTrees ( btsID )
2022-05-26 15:18:53 +08:00
for _ , ref := range refs {
2023-02-06 08:54:08 +08:00
tree := cachedTrees [ ref . DefBlockRootID ]
2023-02-05 18:32:22 +08:00
if nil == tree {
2024-07-14 11:48:56 +08:00
tree , _ = loadTreeByBlockTree ( bts [ ref . DefBlockRootID ] )
2023-02-05 18:32:22 +08:00
}
if nil == tree {
2022-05-26 15:18:53 +08:00
continue
}
2023-02-05 18:32:22 +08:00
cachedTrees [ ref . RootID ] = tree
node := treenode . GetNodeInTree ( tree , ref . DefBlockID )
if nil == node {
continue
}
sqlBlock := sql . BuildBlockFromNode ( node , tree )
if nil == sqlBlock {
return
}
block := fromSQLBlock ( sqlBlock , "" , 0 )
block . RefText = getNodeRefText ( node )
2022-11-13 21:07:45 +08:00
block . RefText = maxContent ( block . RefText , Conf . Editor . BlockRefDynamicAnchorTextMaxLen )
2022-05-26 15:18:53 +08:00
ret = append ( ret , block )
}
if 1 > len ( ret ) {
ret = [ ] * Block { }
}
2023-10-09 17:30:19 +08:00
// 在 hPath 中加入笔记本名 Show notebooks in hpath of block ref search list results https://github.com/siyuan-note/siyuan/issues/9378
prependNotebookNameInHPath ( ret )
2022-05-26 15:18:53 +08:00
return
}
2023-04-22 09:27:40 +08:00
ret = fullTextSearchRefBlock ( keyword , beforeLen , onlyDoc )
2022-05-26 15:18:53 +08:00
tmp := ret [ : 0 ]
2024-07-14 11:44:13 +08:00
var btsID [ ] string
for _ , b := range ret {
btsID = append ( btsID , b . RootID )
}
2024-07-14 11:55:16 +08:00
btsID = gulu . Str . RemoveDuplicatedElem ( btsID )
2024-07-14 11:44:13 +08:00
bts := treenode . GetBlockTrees ( btsID )
2022-05-26 15:18:53 +08:00
for _ , b := range ret {
2023-02-05 18:32:22 +08:00
tree := cachedTrees [ b . RootID ]
if nil == tree {
2024-07-14 11:44:13 +08:00
tree , _ = loadTreeByBlockTree ( bts [ b . RootID ] )
2023-02-05 18:32:22 +08:00
}
if nil == tree {
continue
}
cachedTrees [ b . RootID ] = tree
2023-02-05 18:38:25 +08:00
b . RefText = getBlockRefText ( b . ID , tree )
2023-02-05 18:32:22 +08:00
2022-05-26 15:18:53 +08:00
hitFirstChildID := false
2024-04-07 09:43:29 +08:00
if b . IsContainerBlock ( ) && "NodeDocument" != b . Type {
2022-05-26 15:18:53 +08:00
// `((` 引用候选中排除当前块的父块 https://github.com/siyuan-note/siyuan/issues/4538
2023-02-05 18:32:22 +08:00
tree := cachedTrees [ b . RootID ]
2022-05-26 15:18:53 +08:00
if nil == tree {
2024-07-14 11:44:13 +08:00
tree , _ = loadTreeByBlockTree ( bts [ b . RootID ] )
2023-02-05 18:32:22 +08:00
cachedTrees [ b . RootID ] = tree
2022-05-26 15:18:53 +08:00
}
if nil != tree {
bNode := treenode . GetNodeInTree ( tree , b . ID )
if fc := treenode . FirstLeafBlock ( bNode ) ; nil != fc && fc . ID == id {
hitFirstChildID = true
}
}
}
2024-04-09 17:51:39 +08:00
if "NodeAttributeView" == b . Type {
// 数据库块可以添加到自身数据库块中,当前文档也可以添加到自身数据库块中
2022-05-26 15:18:53 +08:00
tmp = append ( tmp , b )
2024-04-09 17:51:39 +08:00
} else {
// 排除自身块、父块和根块
if b . ID != id && ! hitFirstChildID && b . ID != rootID {
tmp = append ( tmp , b )
}
2022-05-26 15:18:53 +08:00
}
2024-04-09 17:51:39 +08:00
2022-05-26 15:18:53 +08:00
}
ret = tmp
2024-06-15 21:47:49 +08:00
if ! isDatabase {
// 如果非数据库中搜索块引,则不允许新建重名文档
if block := treenode . GetBlockTree ( id ) ; nil != block {
p := path . Join ( block . HPath , keyword )
newDoc = nil == treenode . GetBlockTreeRootByHPath ( block . BoxID , p )
}
2024-06-20 20:02:00 +08:00
} else { // 如果是数据库中搜索绑定块,则允许新建重名文档 https://github.com/siyuan-note/siyuan/issues/11713
newDoc = true
2022-05-26 15:18:53 +08:00
}
2023-10-09 17:30:19 +08:00
// 在 hPath 中加入笔记本名 Show notebooks in hpath of block ref search list results https://github.com/siyuan-note/siyuan/issues/9378
prependNotebookNameInHPath ( ret )
2022-05-26 15:18:53 +08:00
return
}
2023-10-09 17:30:19 +08:00
func prependNotebookNameInHPath ( blocks [ ] * Block ) {
var boxIDs [ ] string
for _ , b := range blocks {
boxIDs = append ( boxIDs , b . Box )
}
boxIDs = gulu . Str . RemoveDuplicatedElem ( boxIDs )
boxNames := Conf . BoxNames ( boxIDs )
for _ , b := range blocks {
name := boxNames [ b . Box ]
2023-10-11 09:04:39 +08:00
b . HPath = util . EscapeHTML ( name ) + b . HPath
2023-10-09 17:30:19 +08:00
}
}
2023-12-16 23:25:14 +08:00
func FindReplace ( keyword , replacement string , replaceTypes map [ string ] bool , ids [ ] string , paths , boxes [ ] string , types map [ string ] bool , method , orderBy , groupBy int ) ( err error ) {
2022-11-28 11:54:04 +08:00
// method: 0: 文本, 1: 查询语法, 2: SQL, 3: 正则表达式
if 1 == method || 2 == method {
2022-11-28 16:30:08 +08:00
err = errors . New ( Conf . Language ( 132 ) )
2022-11-28 11:54:04 +08:00
return
}
2024-01-13 09:21:55 +08:00
if 0 != groupBy {
// 按文档分组后不支持替换 Need to be reminded that replacement operations are not supported after grouping by doc https://github.com/siyuan-note/siyuan/issues/10161
2024-03-31 23:01:00 +08:00
// 因为分组条件传入以后搜索只能命中文档块,会导致 全部替换 失效
2024-01-13 09:21:55 +08:00
err = errors . New ( Conf . Language ( 221 ) )
return
}
2023-09-20 10:45:09 +08:00
// No longer trim spaces for the keyword and replacement https://github.com/siyuan-note/siyuan/issues/9229
2022-05-26 15:18:53 +08:00
if keyword == replacement {
return
}
2023-03-01 09:17:05 +08:00
r , _ := regexp . Compile ( keyword )
escapedKey := util . EscapeHTML ( keyword )
escapedR , _ := regexp . Compile ( escapedKey )
2022-06-23 19:35:59 +08:00
ids = gulu . Str . RemoveDuplicatedElem ( ids )
2022-06-13 19:59:14 +08:00
var renameRoots [ ] * ast . Node
renameRootTitles := map [ string ] string { }
2023-04-03 11:25:56 +08:00
cachedTrees := map [ string ] * parse . Tree { }
2023-04-03 11:38:18 +08:00
historyDir , err := getHistoryDir ( HistoryOpReplace , time . Now ( ) )
2024-09-04 04:40:50 +03:00
if err != nil {
2023-04-03 11:38:18 +08:00
logging . LogErrorf ( "get history dir failed: %s" , err )
return
}
2023-06-29 18:06:04 +08:00
if 1 > len ( ids ) {
2023-06-29 18:06:18 +08:00
// `Replace All` is no longer affected by pagination https://github.com/siyuan-note/siyuan/issues/8265
2023-06-29 18:14:36 +08:00
blocks , _ , _ , _ := FullTextSearchBlock ( keyword , boxes , paths , types , method , orderBy , groupBy , 1 , math . MaxInt )
2023-06-29 18:06:04 +08:00
for _ , block := range blocks {
ids = append ( ids , block . ID )
}
}
2022-05-26 15:18:53 +08:00
for _ , id := range ids {
2023-04-03 11:25:56 +08:00
bt := treenode . GetBlockTree ( id )
if nil == bt {
continue
}
tree := cachedTrees [ bt . RootID ]
if nil != tree {
continue
}
2024-03-10 23:27:13 +08:00
tree , _ = LoadTreeByBlockID ( id )
2023-04-03 11:25:56 +08:00
if nil == tree {
continue
}
2023-04-03 11:38:18 +08:00
historyPath := filepath . Join ( historyDir , tree . Box , tree . Path )
2024-09-04 04:40:50 +03:00
if err = os . MkdirAll ( filepath . Dir ( historyPath ) , 0755 ) ; err != nil {
2023-04-03 11:38:18 +08:00
logging . LogErrorf ( "generate history failed: %s" , err )
return
}
var data [ ] byte
if data , err = filelock . ReadFile ( filepath . Join ( util . DataDir , tree . Box , tree . Path ) ) ; err != nil {
logging . LogErrorf ( "generate history failed: %s" , err )
return
}
if err = gulu . File . WriteFileSafer ( historyPath , data , 0644 ) ; err != nil {
logging . LogErrorf ( "generate history failed: %s" , err )
return
}
2023-04-03 11:25:56 +08:00
cachedTrees [ bt . RootID ] = tree
}
2023-04-03 11:38:18 +08:00
indexHistoryDir ( filepath . Base ( historyDir ) , util . NewLute ( ) )
2023-04-03 11:25:56 +08:00
2024-09-08 23:51:26 +08:00
luteEngine := util . NewLute ( )
var reloadTreeIDs [ ] string
2023-06-29 18:14:36 +08:00
for i , id := range ids {
2023-04-03 11:25:56 +08:00
bt := treenode . GetBlockTree ( id )
if nil == bt {
continue
}
tree := cachedTrees [ bt . RootID ]
if nil == tree {
continue
2022-05-26 15:18:53 +08:00
}
node := treenode . GetNodeInTree ( tree , id )
if nil == node {
2023-04-03 11:25:56 +08:00
continue
2022-05-26 15:18:53 +08:00
}
2024-09-08 23:51:26 +08:00
reloadTreeIDs = append ( reloadTreeIDs , tree . ID )
2023-06-29 21:17:37 +08:00
if ast . NodeDocument == node . Type {
2023-12-17 22:15:02 +08:00
if ! replaceTypes [ "docTitle" ] {
2023-12-16 23:25:14 +08:00
continue
}
2023-06-29 21:17:37 +08:00
title := node . IALAttr ( "title" )
if 0 == method {
if strings . Contains ( title , keyword ) {
2024-04-20 09:34:18 +08:00
docTitleReplacement := strings . ReplaceAll ( replacement , "/" , "" )
renameRootTitles [ node . ID ] = strings . ReplaceAll ( title , keyword , docTitleReplacement )
2023-06-29 21:17:37 +08:00
renameRoots = append ( renameRoots , node )
2022-05-26 15:18:53 +08:00
}
2023-06-29 21:17:37 +08:00
} else if 3 == method {
if nil != r && r . MatchString ( title ) {
2024-04-20 09:34:18 +08:00
docTitleReplacement := strings . ReplaceAll ( replacement , "/" , "" )
renameRootTitles [ node . ID ] = r . ReplaceAllString ( title , docTitleReplacement )
2023-06-29 21:17:37 +08:00
renameRoots = append ( renameRoots , node )
}
}
} else {
2024-04-20 10:02:44 +08:00
var unlinks [ ] * ast . Node
2023-06-29 21:17:37 +08:00
ast . Walk ( node , func ( n * ast . Node , entering bool ) ast . WalkStatus {
if ! entering {
return ast . WalkContinue
2022-05-26 15:18:53 +08:00
}
2023-06-29 21:17:37 +08:00
switch n . Type {
2023-12-16 23:25:14 +08:00
case ast . NodeText :
if ! replaceTypes [ "text" ] {
return ast . WalkContinue
}
2024-04-20 10:02:44 +08:00
if replaceTextNode ( n , method , keyword , replacement , r , luteEngine ) {
unlinks = append ( unlinks , n )
}
2023-12-16 23:25:14 +08:00
case ast . NodeLinkDest :
2023-12-17 22:15:02 +08:00
if ! replaceTypes [ "imgSrc" ] {
2023-12-16 23:25:14 +08:00
return ast . WalkContinue
}
replaceNodeTokens ( n , method , keyword , replacement , r )
case ast . NodeLinkText :
2023-12-17 22:15:02 +08:00
if ! replaceTypes [ "imgText" ] {
2023-12-16 23:25:14 +08:00
return ast . WalkContinue
}
replaceNodeTokens ( n , method , keyword , replacement , r )
case ast . NodeLinkTitle :
2023-12-17 22:15:02 +08:00
if ! replaceTypes [ "imgTitle" ] {
2023-12-16 23:25:14 +08:00
return ast . WalkContinue
}
replaceNodeTokens ( n , method , keyword , replacement , r )
case ast . NodeCodeBlockCode :
2023-12-17 22:15:02 +08:00
if ! replaceTypes [ "codeBlock" ] {
2023-12-16 23:25:14 +08:00
return ast . WalkContinue
}
replaceNodeTokens ( n , method , keyword , replacement , r )
case ast . NodeMathBlockContent :
2023-12-17 22:15:02 +08:00
if ! replaceTypes [ "mathBlock" ] {
2023-12-16 23:25:14 +08:00
return ast . WalkContinue
}
replaceNodeTokens ( n , method , keyword , replacement , r )
case ast . NodeHTMLBlock :
2023-12-17 22:15:02 +08:00
if ! replaceTypes [ "htmlBlock" ] {
2023-12-16 23:25:14 +08:00
return ast . WalkContinue
2022-11-04 15:44:39 +08:00
}
2023-12-16 23:25:14 +08:00
replaceNodeTokens ( n , method , keyword , replacement , r )
2023-06-29 21:17:37 +08:00
case ast . NodeTextMark :
if n . IsTextMarkType ( "code" ) {
2023-12-16 23:25:14 +08:00
if ! replaceTypes [ "code" ] {
return ast . WalkContinue
}
2023-06-29 21:17:37 +08:00
if 0 == method {
if strings . Contains ( n . TextMarkTextContent , escapedKey ) {
n . TextMarkTextContent = strings . ReplaceAll ( n . TextMarkTextContent , escapedKey , replacement )
}
} else if 3 == method {
if nil != escapedR && escapedR . MatchString ( n . TextMarkTextContent ) {
n . TextMarkTextContent = escapedR . ReplaceAllString ( n . TextMarkTextContent , replacement )
}
2022-11-28 11:54:04 +08:00
}
2023-12-16 23:25:14 +08:00
} else if n . IsTextMarkType ( "a" ) {
2023-12-17 22:15:02 +08:00
if replaceTypes [ "aText" ] {
2023-12-16 23:25:14 +08:00
if 0 == method {
2024-03-22 19:28:59 +08:00
if strings . Contains ( n . TextMarkTextContent , keyword ) {
2023-12-16 23:25:14 +08:00
n . TextMarkTextContent = strings . ReplaceAll ( n . TextMarkTextContent , keyword , replacement )
}
} else if 3 == method {
if nil != r && r . MatchString ( n . TextMarkTextContent ) {
n . TextMarkTextContent = r . ReplaceAllString ( n . TextMarkTextContent , replacement )
}
2023-06-29 21:17:37 +08:00
}
2023-12-16 23:25:14 +08:00
}
2023-12-17 22:15:02 +08:00
if replaceTypes [ "aTitle" ] {
2023-12-16 23:25:14 +08:00
if 0 == method {
if strings . Contains ( n . TextMarkATitle , keyword ) {
n . TextMarkATitle = strings . ReplaceAll ( n . TextMarkATitle , keyword , replacement )
}
} else if 3 == method {
if nil != r && r . MatchString ( n . TextMarkATitle ) {
n . TextMarkATitle = r . ReplaceAllString ( n . TextMarkATitle , replacement )
}
}
}
2023-12-17 22:15:02 +08:00
if replaceTypes [ "aHref" ] {
2023-12-16 23:25:14 +08:00
if 0 == method {
if strings . Contains ( n . TextMarkAHref , keyword ) {
n . TextMarkAHref = strings . ReplaceAll ( n . TextMarkAHref , keyword , replacement )
}
} else if 3 == method {
if nil != r && r . MatchString ( n . TextMarkAHref ) {
n . TextMarkAHref = r . ReplaceAllString ( n . TextMarkAHref , replacement )
}
2023-06-29 21:17:37 +08:00
}
2022-11-28 11:54:04 +08:00
}
2022-11-04 15:44:39 +08:00
2023-12-16 23:25:14 +08:00
} else if n . IsTextMarkType ( "em" ) {
if ! replaceTypes [ "em" ] {
return ast . WalkContinue
2022-11-28 11:54:04 +08:00
}
2023-12-16 23:25:14 +08:00
2024-05-10 23:10:46 +08:00
replaceNodeTextMarkTextContent ( n , method , keyword , replacement , r , "em" )
2023-12-16 23:25:14 +08:00
} else if n . IsTextMarkType ( "strong" ) {
if ! replaceTypes [ "strong" ] {
return ast . WalkContinue
2022-11-28 11:54:04 +08:00
}
2023-12-16 23:25:14 +08:00
2024-05-10 23:10:46 +08:00
replaceNodeTextMarkTextContent ( n , method , keyword , replacement , r , "strong" )
2023-12-16 23:25:14 +08:00
} else if n . IsTextMarkType ( "kbd" ) {
if ! replaceTypes [ "kbd" ] {
return ast . WalkContinue
2022-11-28 11:54:04 +08:00
}
2023-12-16 23:25:14 +08:00
2024-05-10 23:10:46 +08:00
replaceNodeTextMarkTextContent ( n , method , keyword , replacement , r , "kbd" )
2023-12-16 23:25:14 +08:00
} else if n . IsTextMarkType ( "mark" ) {
if ! replaceTypes [ "mark" ] {
return ast . WalkContinue
2022-11-28 11:54:04 +08:00
}
2023-12-16 23:25:14 +08:00
2024-05-10 23:10:46 +08:00
replaceNodeTextMarkTextContent ( n , method , keyword , replacement , r , "mark" )
2023-12-16 23:25:14 +08:00
} else if n . IsTextMarkType ( "s" ) {
if ! replaceTypes [ "s" ] {
return ast . WalkContinue
2023-06-29 21:17:37 +08:00
}
2023-12-16 23:25:14 +08:00
2024-05-10 23:10:46 +08:00
replaceNodeTextMarkTextContent ( n , method , keyword , replacement , r , "s" )
2023-12-16 23:25:14 +08:00
} else if n . IsTextMarkType ( "sub" ) {
if ! replaceTypes [ "sub" ] {
return ast . WalkContinue
}
2024-05-10 23:10:46 +08:00
replaceNodeTextMarkTextContent ( n , method , keyword , replacement , r , "sub" )
2023-12-16 23:25:14 +08:00
} else if n . IsTextMarkType ( "sup" ) {
if ! replaceTypes [ "sup" ] {
return ast . WalkContinue
}
2024-05-10 23:10:46 +08:00
replaceNodeTextMarkTextContent ( n , method , keyword , replacement , r , "sup" )
2023-12-16 23:25:14 +08:00
} else if n . IsTextMarkType ( "tag" ) {
if ! replaceTypes [ "tag" ] {
return ast . WalkContinue
}
2024-05-10 23:10:46 +08:00
replaceNodeTextMarkTextContent ( n , method , keyword , replacement , r , "tag" )
2023-12-16 23:25:14 +08:00
} else if n . IsTextMarkType ( "u" ) {
if ! replaceTypes [ "u" ] {
return ast . WalkContinue
}
2024-05-10 23:10:46 +08:00
replaceNodeTextMarkTextContent ( n , method , keyword , replacement , r , "u" )
2023-12-27 08:29:27 +08:00
} else if n . IsTextMarkType ( "inline-math" ) {
2023-12-17 22:15:02 +08:00
if ! replaceTypes [ "inlineMath" ] {
2023-12-16 23:25:14 +08:00
return ast . WalkContinue
}
if 0 == method {
if strings . Contains ( n . TextMarkInlineMathContent , keyword ) {
n . TextMarkInlineMathContent = strings . ReplaceAll ( n . TextMarkInlineMathContent , keyword , replacement )
2023-06-29 21:17:37 +08:00
}
2023-12-16 23:25:14 +08:00
} else if 3 == method {
if nil != r && r . MatchString ( n . TextMarkInlineMathContent ) {
2023-06-29 21:17:37 +08:00
n . TextMarkInlineMathContent = r . ReplaceAllString ( n . TextMarkInlineMathContent , replacement )
}
2023-12-16 23:25:14 +08:00
}
2023-12-27 08:29:27 +08:00
} else if n . IsTextMarkType ( "inline-memo" ) {
2023-12-17 22:15:02 +08:00
if ! replaceTypes [ "inlineMemo" ] {
2023-12-16 23:25:14 +08:00
return ast . WalkContinue
}
if 0 == method {
if strings . Contains ( n . TextMarkInlineMemoContent , keyword ) {
n . TextMarkInlineMemoContent = strings . ReplaceAll ( n . TextMarkInlineMemoContent , keyword , replacement )
2023-06-29 21:17:37 +08:00
}
2023-12-16 23:25:14 +08:00
} else if 3 == method {
if nil != r && r . MatchString ( n . TextMarkInlineMemoContent ) {
n . TextMarkInlineMemoContent = r . ReplaceAllString ( n . TextMarkInlineMemoContent , replacement )
2023-06-29 21:17:37 +08:00
}
2022-11-28 11:54:04 +08:00
}
2023-12-29 13:25:00 +08:00
} else if n . IsTextMarkType ( "text" ) {
// Search and replace fails in some cases https://github.com/siyuan-note/siyuan/issues/10016
if ! replaceTypes [ "text" ] {
return ast . WalkContinue
}
2024-05-10 23:10:46 +08:00
replaceNodeTextMarkTextContent ( n , method , keyword , replacement , r , "text" )
2024-07-15 12:18:44 +08:00
} else if n . IsTextMarkType ( "block-ref" ) {
if ! replaceTypes [ "blockRef" ] {
return ast . WalkContinue
}
if 0 == method {
if strings . Contains ( n . TextMarkTextContent , keyword ) {
n . TextMarkTextContent = strings . ReplaceAll ( n . TextMarkTextContent , keyword , replacement )
n . TextMarkBlockRefSubtype = "s"
}
} else if 3 == method {
if nil != r && r . MatchString ( n . TextMarkTextContent ) {
n . TextMarkTextContent = r . ReplaceAllString ( n . TextMarkTextContent , replacement )
n . TextMarkBlockRefSubtype = "s"
}
}
2022-11-28 11:54:04 +08:00
}
2022-09-18 20:07:38 +08:00
}
2023-06-29 21:17:37 +08:00
return ast . WalkContinue
} )
2022-05-26 15:18:53 +08:00
2024-04-20 10:02:44 +08:00
for _ , unlink := range unlinks {
unlink . Unlink ( )
}
2024-09-04 04:40:50 +03:00
if err = writeTreeUpsertQueue ( tree ) ; err != nil {
2023-06-29 21:17:37 +08:00
return
}
2022-05-26 15:18:53 +08:00
}
2023-06-29 18:14:36 +08:00
2023-06-29 18:44:45 +08:00
util . PushEndlessProgress ( fmt . Sprintf ( Conf . Language ( 206 ) , i + 1 , len ( ids ) ) )
2022-05-26 15:18:53 +08:00
}
2023-06-29 18:44:45 +08:00
for i , renameRoot := range renameRoots {
2022-06-13 19:59:14 +08:00
newTitle := renameRootTitles [ renameRoot . ID ]
RenameDoc ( renameRoot . Box , renameRoot . Path , newTitle )
2023-06-29 18:44:45 +08:00
2023-06-29 21:17:37 +08:00
util . PushEndlessProgress ( fmt . Sprintf ( Conf . Language ( 207 ) , i + 1 , len ( renameRoots ) ) )
2022-06-13 19:59:14 +08:00
}
2024-10-14 20:39:16 +08:00
sql . FlushQueue ( )
2024-09-08 23:51:26 +08:00
reloadTreeIDs = gulu . Str . RemoveDuplicatedElem ( reloadTreeIDs )
for _ , id := range reloadTreeIDs {
2024-09-11 17:22:16 +08:00
refreshProtyle ( id )
2022-05-26 15:18:53 +08:00
}
2024-09-08 23:51:26 +08:00
2024-10-14 20:39:16 +08:00
sql . FlushQueue ( )
2024-09-08 23:51:26 +08:00
util . PushClearProgress ( )
2022-05-26 15:18:53 +08:00
return
}
2024-05-10 23:10:46 +08:00
func replaceNodeTextMarkTextContent ( n * ast . Node , method int , keyword string , replacement string , r * regexp . Regexp , typ string ) {
2023-12-16 23:25:14 +08:00
if 0 == method {
2024-05-10 23:10:46 +08:00
if "tag" == typ {
keyword = strings . TrimPrefix ( keyword , "#" )
keyword = strings . TrimSuffix ( keyword , "#" )
}
2023-12-16 23:25:14 +08:00
if strings . Contains ( n . TextMarkTextContent , keyword ) {
n . TextMarkTextContent = strings . ReplaceAll ( n . TextMarkTextContent , keyword , replacement )
}
} else if 3 == method {
if nil != r && r . MatchString ( n . TextMarkTextContent ) {
n . TextMarkTextContent = r . ReplaceAllString ( n . TextMarkTextContent , replacement )
}
}
}
2024-04-20 10:02:44 +08:00
// replaceTextNode 替换文本节点为其他节点。
// Supports replacing text elements with other elements https://github.com/siyuan-note/siyuan/issues/11058
func replaceTextNode ( text * ast . Node , method int , keyword string , replacement string , r * regexp . Regexp , luteEngine * lute . Lute ) bool {
if 0 == method {
2024-09-19 23:56:54 +08:00
newContent := text . Tokens
if Conf . Search . CaseSensitive {
if bytes . Contains ( text . Tokens , [ ] byte ( keyword ) ) {
newContent = bytes . ReplaceAll ( text . Tokens , [ ] byte ( keyword ) , [ ] byte ( replacement ) )
}
} else {
// 当搜索结果中的文本元素包含大小写混合时替换失败
// Replace fails when search results contain mixed case in text elements https://github.com/siyuan-note/siyuan/issues/9171
keywords := strings . Split ( keyword , " " )
// keyword 可能是 "foo Foo" 使用空格分隔的大小写命中情况,这里统一转换小写后去重
2024-10-16 22:52:32 +08:00
if 0 < len ( keywords ) {
2024-09-19 23:56:54 +08:00
var lowerKeywords [ ] string
for _ , k := range keywords {
lowerKeywords = append ( lowerKeywords , strings . ToLower ( k ) )
}
lowerKeywords = gulu . Str . RemoveDuplicatedElem ( lowerKeywords )
keyword = strings . Join ( lowerKeywords , " " )
}
if bytes . Contains ( bytes . ToLower ( text . Tokens ) , [ ] byte ( keyword ) ) {
newContent = replaceCaseInsensitive ( text . Tokens , [ ] byte ( keyword ) , [ ] byte ( replacement ) )
}
}
if ! bytes . Equal ( newContent , text . Tokens ) {
2024-04-20 10:02:44 +08:00
tree := parse . Inline ( "" , newContent , luteEngine . ParseOptions )
if nil == tree . Root . FirstChild {
return false
}
parse . NestedInlines2FlattedSpans ( tree , false )
var replaceNodes [ ] * ast . Node
for rNode := tree . Root . FirstChild . FirstChild ; nil != rNode ; rNode = rNode . Next {
replaceNodes = append ( replaceNodes , rNode )
}
for _ , rNode := range replaceNodes {
text . InsertBefore ( rNode )
}
return true
}
} else if 3 == method {
if nil != r && r . MatchString ( string ( text . Tokens ) ) {
2024-05-17 21:50:35 +08:00
newContent := [ ] byte ( r . ReplaceAllString ( string ( text . Tokens ) , replacement ) )
2024-04-20 10:02:44 +08:00
tree := parse . Inline ( "" , newContent , luteEngine . ParseOptions )
if nil == tree . Root . FirstChild {
return false
}
var replaceNodes [ ] * ast . Node
2024-04-30 00:19:30 +08:00
for rNode := tree . Root . FirstChild . FirstChild ; nil != rNode ; rNode = rNode . Next {
2024-04-20 10:02:44 +08:00
replaceNodes = append ( replaceNodes , rNode )
}
for _ , rNode := range replaceNodes {
text . InsertBefore ( rNode )
}
return true
}
}
return false
}
2023-12-16 23:25:14 +08:00
func replaceNodeTokens ( n * ast . Node , method int , keyword string , replacement string , r * regexp . Regexp ) {
if 0 == method {
if bytes . Contains ( n . Tokens , [ ] byte ( keyword ) ) {
n . Tokens = bytes . ReplaceAll ( n . Tokens , [ ] byte ( keyword ) , [ ] byte ( replacement ) )
}
} else if 3 == method {
if nil != r && r . MatchString ( string ( n . Tokens ) ) {
n . Tokens = [ ] byte ( r . ReplaceAllString ( string ( n . Tokens ) , replacement ) )
}
}
}
2022-12-02 23:38:57 +08:00
// FullTextSearchBlock 搜索内容块。
//
2022-12-09 12:08:07 +08:00
// method: 0: 关键字, 1: 查询语法, 2: SQL, 3: 正则表达式
// orderBy: 0: 按块类型( 默认) , 1: 按创建时间升序, 2: 按创建时间降序, 3: 按更新时间升序, 4: 按更新时间降序, 5: 按内容顺序( 仅在按文档分组时) , 6: 按相关度升序, 7: 按相关度降序
2022-12-02 23:38:57 +08:00
// groupBy: 0: 不分组, 1: 按文档分组
2023-06-29 18:14:36 +08:00
func FullTextSearchBlock ( query string , boxes , paths [ ] string , types map [ string ] bool , method , orderBy , groupBy , page , pageSize int ) ( ret [ ] * Block , matchedBlockCount , matchedRootCount , pageCount int ) {
2024-01-14 11:42:28 +08:00
ret = [ ] * Block { }
if "" == query {
return
}
trimQuery := strings . TrimSpace ( query )
if "" != trimQuery {
query = trimQuery
}
2024-10-24 00:27:26 +08:00
var ignoreFilter string
if ignoreLines := getSearchIgnoreLines ( ) ; 0 < len ( ignoreLines ) {
// Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
buf := bytes . Buffer { }
for _ , line := range ignoreLines {
buf . WriteString ( " AND " )
buf . WriteString ( line )
}
ignoreFilter += buf . String ( )
}
2022-11-26 18:12:54 +08:00
beforeLen := 36
var blocks [ ] * Block
2024-05-26 17:41:35 +08:00
orderByClause := buildOrderBy ( query , method , orderBy )
2022-11-28 11:24:31 +08:00
switch method {
case 1 : // 查询语法
2024-10-24 00:27:26 +08:00
typeFilter := buildTypeFilter ( types )
2022-11-29 23:21:19 +08:00
boxFilter := buildBoxesFilter ( boxes )
pathFilter := buildPathsFilter ( paths )
2024-10-24 00:27:26 +08:00
blocks , matchedBlockCount , matchedRootCount = fullTextSearchByQuerySyntax ( query , boxFilter , pathFilter , typeFilter , ignoreFilter , orderByClause , beforeLen , page , pageSize )
2022-11-28 11:24:31 +08:00
case 2 : // SQL
2023-06-29 18:14:36 +08:00
blocks , matchedBlockCount , matchedRootCount = searchBySQL ( query , beforeLen , page , pageSize )
2022-11-28 11:24:31 +08:00
case 3 : // 正则表达式
typeFilter := buildTypeFilter ( types )
2022-11-29 23:21:19 +08:00
boxFilter := buildBoxesFilter ( boxes )
pathFilter := buildPathsFilter ( paths )
2024-10-24 00:27:26 +08:00
blocks , matchedBlockCount , matchedRootCount = fullTextSearchByRegexp ( query , boxFilter , pathFilter , typeFilter , ignoreFilter , orderByClause , beforeLen , page , pageSize )
2022-12-09 12:08:07 +08:00
default : // 关键字
2024-10-24 00:27:26 +08:00
typeFilter := buildTypeFilter ( types )
2022-11-29 23:21:19 +08:00
boxFilter := buildBoxesFilter ( boxes )
pathFilter := buildPathsFilter ( paths )
2024-10-24 00:27:26 +08:00
blocks , matchedBlockCount , matchedRootCount = fullTextSearchByKeyword ( query , boxFilter , pathFilter , typeFilter , ignoreFilter , orderByClause , beforeLen , page , pageSize )
2022-11-26 18:12:54 +08:00
}
2023-04-21 10:03:05 +08:00
pageCount = ( matchedBlockCount + pageSize - 1 ) / pageSize
2022-11-26 18:12:54 +08:00
switch groupBy {
case 0 : // 不分组
ret = blocks
case 1 : // 按文档分组
rootMap := map [ string ] bool { }
var rootIDs [ ] string
2022-12-02 23:38:57 +08:00
contentSorts := map [ string ] int { }
2024-07-14 11:55:16 +08:00
var btsID [ ] string
for _ , b := range blocks {
btsID = append ( btsID , b . RootID )
}
btsID = gulu . Str . RemoveDuplicatedElem ( btsID )
bts := treenode . GetBlockTrees ( btsID )
2022-11-26 18:12:54 +08:00
for _ , b := range blocks {
if _ , ok := rootMap [ b . RootID ] ; ! ok {
rootMap [ b . RootID ] = true
rootIDs = append ( rootIDs , b . RootID )
2024-07-14 11:55:16 +08:00
tree , _ := loadTreeByBlockTree ( bts [ b . RootID ] )
2022-11-30 12:11:49 +08:00
if nil == tree {
continue
}
2022-12-02 23:38:57 +08:00
if 5 == orderBy { // 按内容顺序(仅在按文档分组时)
sort := 0
ast . Walk ( tree . Root , func ( n * ast . Node , entering bool ) ast . WalkStatus {
if ! entering || ! n . IsBlock ( ) {
return ast . WalkContinue
}
2022-11-30 12:11:49 +08:00
2022-12-02 23:38:57 +08:00
contentSorts [ n . ID ] = sort
sort ++
return ast . WalkContinue
} )
}
2022-11-26 18:12:54 +08:00
}
}
2022-11-30 12:11:49 +08:00
2022-11-26 18:12:54 +08:00
sqlRoots := sql . GetBlocks ( rootIDs )
roots := fromSQLBlocks ( & sqlRoots , "" , beforeLen )
for _ , root := range roots {
for _ , b := range blocks {
2022-12-02 23:38:57 +08:00
if 5 == orderBy { // 按内容顺序(仅在按文档分组时)
b . Sort = contentSorts [ b . ID ]
}
2022-11-26 18:12:54 +08:00
if b . RootID == root . ID {
root . Children = append ( root . Children , b )
}
}
2022-12-02 23:38:57 +08:00
switch orderBy {
case 1 : //按创建时间升序
sort . Slice ( root . Children , func ( i , j int ) bool { return root . Children [ i ] . Created < root . Children [ j ] . Created } )
case 2 : // 按创建时间降序
sort . Slice ( root . Children , func ( i , j int ) bool { return root . Children [ i ] . Created > root . Children [ j ] . Created } )
case 3 : // 按更新时间升序
sort . Slice ( root . Children , func ( i , j int ) bool { return root . Children [ i ] . Updated < root . Children [ j ] . Updated } )
case 4 : // 按更新时间降序
sort . Slice ( root . Children , func ( i , j int ) bool { return root . Children [ i ] . Updated > root . Children [ j ] . Updated } )
case 5 : // 按内容顺序(仅在按文档分组时)
sort . Slice ( root . Children , func ( i , j int ) bool { return root . Children [ i ] . Sort < root . Children [ j ] . Sort } )
default : // 按块类型(默认)
sort . Slice ( root . Children , func ( i , j int ) bool { return root . Children [ i ] . Sort < root . Children [ j ] . Sort } )
}
}
switch orderBy {
case 1 : //按创建时间升序
sort . Slice ( roots , func ( i , j int ) bool { return roots [ i ] . Created < roots [ j ] . Created } )
case 2 : // 按创建时间降序
sort . Slice ( roots , func ( i , j int ) bool { return roots [ i ] . Created > roots [ j ] . Created } )
case 3 : // 按更新时间升序
sort . Slice ( roots , func ( i , j int ) bool { return roots [ i ] . Updated < roots [ j ] . Updated } )
case 4 : // 按更新时间降序
sort . Slice ( roots , func ( i , j int ) bool { return roots [ i ] . Updated > roots [ j ] . Updated } )
case 5 : // 按内容顺序(仅在按文档分组时)
2022-12-09 12:08:07 +08:00
// 都是文档,不需要再次排序
case 6 , 7 : // 按相关度
// 已在 ORDER BY 中处理
2022-12-02 23:38:57 +08:00
default : // 按块类型(默认)
// 都是文档,不需要再次排序
2022-11-26 18:12:54 +08:00
}
ret = roots
default :
ret = blocks
2022-05-26 15:18:53 +08:00
}
2022-11-27 11:10:20 +08:00
if 1 > len ( ret ) {
ret = [ ] * Block { }
}
2022-05-26 15:18:53 +08:00
return
}
2022-11-29 23:21:19 +08:00
func buildBoxesFilter ( boxes [ ] string ) string {
if 0 == len ( boxes ) {
return ""
}
builder := bytes . Buffer { }
builder . WriteString ( " AND (" )
for i , box := range boxes {
builder . WriteString ( fmt . Sprintf ( "box = '%s'" , box ) )
if i < len ( boxes ) - 1 {
builder . WriteString ( " OR " )
}
}
builder . WriteString ( ")" )
return builder . String ( )
}
func buildPathsFilter ( paths [ ] string ) string {
if 0 == len ( paths ) {
return ""
}
builder := bytes . Buffer { }
builder . WriteString ( " AND (" )
for i , path := range paths {
builder . WriteString ( fmt . Sprintf ( "path LIKE '%s%%'" , path ) )
if i < len ( paths ) - 1 {
builder . WriteString ( " OR " )
}
}
builder . WriteString ( ")" )
return builder . String ( )
}
2024-05-26 17:41:35 +08:00
func buildOrderBy ( query string , method , orderBy int ) string {
2022-12-02 17:48:52 +08:00
switch orderBy {
case 1 :
return "ORDER BY created ASC"
case 2 :
return "ORDER BY created DESC"
case 3 :
return "ORDER BY updated ASC"
case 4 :
return "ORDER BY updated DESC"
2022-12-09 12:08:07 +08:00
case 6 :
2023-04-03 10:13:57 +08:00
if 0 != method && 1 != method {
// 只有关键字搜索和查询语法搜索才支持按相关度升序 https://github.com/siyuan-note/siyuan/issues/7861
2023-06-27 00:05:57 +08:00
return "ORDER BY sort DESC, updated DESC"
2023-04-03 10:13:57 +08:00
}
2022-12-09 12:08:07 +08:00
return "ORDER BY rank DESC" // 默认是按相关度降序,所以按相关度升序要反过来使用 DESC
case 7 :
2023-04-03 10:13:57 +08:00
if 0 != method && 1 != method {
2023-06-27 00:05:57 +08:00
return "ORDER BY sort ASC, updated DESC"
2023-04-03 10:13:57 +08:00
}
2022-12-09 12:08:07 +08:00
return "ORDER BY rank" // 默认是按相关度降序
2022-12-02 17:48:52 +08:00
default :
2024-05-26 17:41:35 +08:00
clause := "ORDER BY CASE " +
"WHEN name = '${keyword}' THEN 10 " +
"WHEN alias = '${keyword}' THEN 20 " +
"WHEN name LIKE '%${keyword}%' THEN 50 " +
"WHEN alias LIKE '%${keyword}%' THEN 60 " +
"ELSE 65535 END ASC, sort ASC, updated DESC"
clause = strings . ReplaceAll ( clause , "${keyword}" , strings . ReplaceAll ( query , "'" , "''" ) )
return clause
2022-12-02 17:48:52 +08:00
}
}
2022-11-28 11:24:31 +08:00
func buildTypeFilter ( types map [ string ] bool ) string {
2022-05-26 15:18:53 +08:00
s := conf . NewSearch ( )
2024-09-04 04:40:50 +03:00
if err := copier . Copy ( s , Conf . Search ) ; err != nil {
2022-07-17 12:22:32 +08:00
logging . LogErrorf ( "copy search conf failed: %s" , err )
2022-05-26 15:18:53 +08:00
}
if nil != types {
s . Document = types [ "document" ]
s . Heading = types [ "heading" ]
s . List = types [ "list" ]
s . ListItem = types [ "listItem" ]
s . CodeBlock = types [ "codeBlock" ]
s . MathBlock = types [ "mathBlock" ]
s . Table = types [ "table" ]
s . Blockquote = types [ "blockquote" ]
s . SuperBlock = types [ "superBlock" ]
s . Paragraph = types [ "paragraph" ]
s . HTMLBlock = types [ "htmlBlock" ]
2023-01-19 01:16:13 +08:00
s . EmbedBlock = types [ "embedBlock" ]
2023-10-05 12:37:34 +08:00
s . DatabaseBlock = types [ "databaseBlock" ]
2024-03-22 11:47:18 +08:00
s . AudioBlock = types [ "audioBlock" ]
s . VideoBlock = types [ "videoBlock" ]
2024-03-26 16:34:10 +08:00
s . IFrameBlock = types [ "iframeBlock" ]
2024-03-22 11:47:18 +08:00
s . WidgetBlock = types [ "widgetBlock" ]
2022-05-26 15:18:53 +08:00
} else {
s . Document = Conf . Search . Document
s . Heading = Conf . Search . Heading
s . List = Conf . Search . List
s . ListItem = Conf . Search . ListItem
s . CodeBlock = Conf . Search . CodeBlock
s . MathBlock = Conf . Search . MathBlock
s . Table = Conf . Search . Table
s . Blockquote = Conf . Search . Blockquote
s . SuperBlock = Conf . Search . SuperBlock
s . Paragraph = Conf . Search . Paragraph
s . HTMLBlock = Conf . Search . HTMLBlock
2023-01-19 01:16:13 +08:00
s . EmbedBlock = Conf . Search . EmbedBlock
2023-10-05 12:37:34 +08:00
s . DatabaseBlock = Conf . Search . DatabaseBlock
2024-03-22 11:47:18 +08:00
s . AudioBlock = Conf . Search . AudioBlock
s . VideoBlock = Conf . Search . VideoBlock
s . IFrameBlock = Conf . Search . IFrameBlock
s . WidgetBlock = Conf . Search . WidgetBlock
2022-05-26 15:18:53 +08:00
}
return s . TypeFilter ( )
}
2023-06-29 18:14:36 +08:00
func searchBySQL ( stmt string , beforeLen , page , pageSize int ) ( ret [ ] * Block , matchedBlockCount , matchedRootCount int ) {
2024-01-14 11:42:28 +08:00
stmt = filterQueryInvisibleChars ( stmt )
2023-02-13 11:01:23 +08:00
stmt = strings . TrimSpace ( stmt )
2023-04-21 10:46:55 +08:00
blocks := sql . SelectBlocksRawStmt ( stmt , page , pageSize )
2022-05-26 15:18:53 +08:00
ret = fromSQLBlocks ( & blocks , "" , beforeLen )
if 1 > len ( ret ) {
ret = [ ] * Block { }
2022-07-28 01:14:49 +08:00
return
2022-05-26 15:18:53 +08:00
}
2022-07-28 01:14:49 +08:00
stmt = strings . ToLower ( stmt )
2023-02-13 11:01:23 +08:00
if strings . HasPrefix ( stmt , "select a.* " ) { // 多个搜索关键字匹配文档 https://github.com/siyuan-note/siyuan/issues/7350
stmt = strings . ReplaceAll ( stmt , "select a.* " , "select COUNT(a.id) AS `matches`, COUNT(DISTINCT(a.root_id)) AS `docs` " )
} else {
stmt = strings . ReplaceAll ( stmt , "select * " , "select COUNT(id) AS `matches`, COUNT(DISTINCT(root_id)) AS `docs` " )
}
2023-04-21 10:46:55 +08:00
stmt = removeLimitClause ( stmt )
2023-05-04 10:11:29 +08:00
result , _ := sql . QueryNoLimit ( stmt )
2022-07-28 01:14:49 +08:00
if 1 > len ( ret ) {
return
}
matchedBlockCount = int ( result [ 0 ] [ "matches" ] . ( int64 ) )
matchedRootCount = int ( result [ 0 ] [ "docs" ] . ( int64 ) )
2022-05-26 15:18:53 +08:00
return
}
2023-04-21 10:46:55 +08:00
func removeLimitClause ( stmt string ) string {
parsedStmt , err := sqlparser . Parse ( stmt )
2024-09-04 04:40:50 +03:00
if err != nil {
2023-04-21 10:46:55 +08:00
return stmt
}
switch parsedStmt . ( type ) {
case * sqlparser . Select :
slct := parsedStmt . ( * sqlparser . Select )
if nil != slct . Limit {
slct . Limit = nil
}
stmt = sqlparser . String ( slct )
}
return stmt
}
2023-04-22 09:27:40 +08:00
func fullTextSearchRefBlock ( keyword string , beforeLen int , onlyDoc bool ) ( ret [ ] * Block ) {
2024-01-14 11:42:28 +08:00
keyword = filterQueryInvisibleChars ( keyword )
2022-05-26 15:18:53 +08:00
2024-04-03 11:12:48 +08:00
if id := extractID ( keyword ) ; "" != id {
ret , _ , _ = searchBySQL ( "SELECT * FROM `blocks` WHERE `id` = '" + id + "'" , 36 , 1 , 32 )
2022-05-26 15:18:53 +08:00
return
}
quotedKeyword := stringQuery ( keyword )
table := "blocks_fts" // 大小写敏感
if ! Conf . Search . CaseSensitive {
table = "blocks_fts_case_insensitive"
}
projections := "id, parent_id, root_id, hash, box, path, " +
2022-08-16 10:24:38 +08:00
"snippet(" + table + ", 6, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 64) AS hpath, " +
"snippet(" + table + ", 7, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 64) AS name, " +
"snippet(" + table + ", 8, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 64) AS alias, " +
"snippet(" + table + ", 9, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 64) AS memo, " +
2024-09-27 11:38:53 +08:00
"snippet(" + table + ", 10, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 64) AS tag, " +
2022-08-16 10:24:38 +08:00
"snippet(" + table + ", 11, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 64) AS content, " +
2022-05-26 15:18:53 +08:00
"fcontent, markdown, length, type, subtype, ial, sort, created, updated"
2023-04-17 22:47:25 +08:00
stmt := "SELECT " + projections + " FROM " + table + " WHERE " + table + " MATCH '" + columnFilter ( ) + ":(" + quotedKeyword + ")' AND type"
2023-04-22 09:27:40 +08:00
if onlyDoc {
2023-04-17 22:47:25 +08:00
stmt += " = 'd'"
} else {
stmt += " IN " + Conf . Search . TypeFilter ( )
}
2024-01-10 09:48:14 +08:00
if ignoreLines := getRefSearchIgnoreLines ( ) ; 0 < len ( ignoreLines ) {
// Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
2024-06-28 22:13:23 +08:00
buf := bytes . Buffer { }
2024-01-10 09:48:14 +08:00
for _ , line := range ignoreLines {
2024-06-28 22:13:23 +08:00
buf . WriteString ( " AND " )
buf . WriteString ( line )
2024-01-10 09:48:14 +08:00
}
2024-06-28 22:13:23 +08:00
stmt += buf . String ( )
2024-01-10 09:48:14 +08:00
}
2024-05-26 17:41:35 +08:00
orderBy := ` ORDER BY CASE
WHEN name = ' $ { keyword } ' THEN 10
WHEN alias = ' $ { keyword } ' THEN 20
WHEN memo = ' $ { keyword } ' THEN 30
WHEN content = ' $ { keyword } ' and type = 'd' THEN 40
WHEN content LIKE ' % $ { keyword } % ' and type = 'd' THEN 41
WHEN name LIKE ' % $ { keyword } % ' THEN 50
WHEN alias LIKE ' % $ { keyword } % ' THEN 60
WHEN content = ' $ { keyword } ' and type = 'h' THEN 70
WHEN content LIKE ' % $ { keyword } % ' and type = 'h' THEN 71
WHEN fcontent = ' $ { keyword } ' and type = 'i' THEN 80
WHEN fcontent LIKE ' % $ { keyword } % ' and type = 'i' THEN 81
WHEN memo LIKE ' % $ { keyword } % ' THEN 90
WHEN content LIKE ' % $ { keyword } % ' and type != 'i' and type != 'l' THEN 100
ELSE 65535 END ASC , sort ASC , length ASC `
2024-01-17 11:01:23 +08:00
orderBy = strings . ReplaceAll ( orderBy , "${keyword}" , strings . ReplaceAll ( keyword , "'" , "''" ) )
2022-05-26 15:18:53 +08:00
stmt += orderBy + " LIMIT " + strconv . Itoa ( Conf . Search . Limit )
2023-02-15 14:26:50 +08:00
blocks := sql . SelectBlocksRawStmtNoParse ( stmt , Conf . Search . Limit )
2022-05-26 15:18:53 +08:00
ret = fromSQLBlocks ( & blocks , "" , beforeLen )
if 1 > len ( ret ) {
ret = [ ] * Block { }
}
return
}
2024-04-03 11:12:48 +08:00
func extractID ( content string ) ( ret string ) {
2024-04-03 11:13:48 +08:00
// Improve block ref search ID extraction https://github.com/siyuan-note/siyuan/issues/10848
2024-04-03 11:12:48 +08:00
if 22 > len ( content ) {
return
}
// 从第一个字符开始循环,直到找到一个合法的 ID 为止
for i := 0 ; i < len ( content ) - 21 ; i ++ {
if ast . IsNodeIDPattern ( content [ i : i + 22 ] ) {
ret = content [ i : i + 22 ]
return
}
}
return
}
2024-10-24 00:27:26 +08:00
func fullTextSearchByQuerySyntax ( query , boxFilter , pathFilter , typeFilter , ignoreFilter , orderBy string , beforeLen , page , pageSize int ) ( ret [ ] * Block , matchedBlockCount , matchedRootCount int ) {
2024-01-14 11:42:28 +08:00
query = filterQueryInvisibleChars ( query )
2023-01-17 22:16:14 +08:00
if ast . IsNodeIDPattern ( query ) {
2023-06-29 18:14:36 +08:00
ret , matchedBlockCount , matchedRootCount = searchBySQL ( "SELECT * FROM `blocks` WHERE `id` = '" + query + "'" , beforeLen , page , pageSize )
2022-07-28 01:14:49 +08:00
return
}
2024-10-24 00:27:26 +08:00
return fullTextSearchByFTS ( query , boxFilter , pathFilter , typeFilter , ignoreFilter , orderBy , beforeLen , page , pageSize )
2022-12-02 17:30:16 +08:00
}
2022-07-28 01:14:49 +08:00
2024-10-24 00:27:26 +08:00
func fullTextSearchByKeyword ( query , boxFilter , pathFilter , typeFilter , ignoreFilter string , orderBy string , beforeLen , page , pageSize int ) ( ret [ ] * Block , matchedBlockCount , matchedRootCount int ) {
2024-01-14 11:42:28 +08:00
query = filterQueryInvisibleChars ( query )
2023-01-17 22:16:14 +08:00
if ast . IsNodeIDPattern ( query ) {
2023-06-29 18:14:36 +08:00
ret , matchedBlockCount , matchedRootCount = searchBySQL ( "SELECT * FROM `blocks` WHERE `id` = '" + query + "'" , beforeLen , page , pageSize )
2022-12-02 17:30:16 +08:00
return
2022-11-21 10:19:56 +08:00
}
2024-10-27 10:39:20 +08:00
if 2 > len ( strings . Split ( query , " " ) ) {
return fullTextSearchByFTS ( query , boxFilter , pathFilter , typeFilter , ignoreFilter , orderBy , beforeLen , page , pageSize )
}
2024-10-24 00:27:26 +08:00
return fullTextSearchByFTSWithRoot ( query , boxFilter , pathFilter , typeFilter , ignoreFilter , orderBy , beforeLen , page , pageSize )
2022-12-02 17:30:16 +08:00
}
2022-11-21 10:19:56 +08:00
2024-10-24 00:27:26 +08:00
func fullTextSearchByRegexp ( exp , boxFilter , pathFilter , typeFilter , ignoreFilter , orderBy string , beforeLen , page , pageSize int ) ( ret [ ] * Block , matchedBlockCount , matchedRootCount int ) {
2024-01-14 11:42:28 +08:00
exp = filterQueryInvisibleChars ( exp )
2022-12-02 17:30:16 +08:00
fieldFilter := fieldRegexp ( exp )
2022-12-31 12:13:18 +08:00
stmt := "SELECT * FROM `blocks` WHERE " + fieldFilter + " AND type IN " + typeFilter
2024-10-24 00:27:26 +08:00
stmt += boxFilter + pathFilter + ignoreFilter + " " + orderBy
2024-09-21 17:29:01 +08:00
regex := regexp . MustCompile ( exp )
blocks := sql . SelectBlocksRegex ( stmt , regex , Conf . Search . Name , Conf . Search . Alias , Conf . Search . Memo , Conf . Search . IAL , page , pageSize )
2022-12-02 17:30:16 +08:00
ret = fromSQLBlocks ( & blocks , "" , beforeLen )
if 1 > len ( ret ) {
ret = [ ] * Block { }
}
2024-10-24 00:27:26 +08:00
matchedBlockCount , matchedRootCount = fullTextSearchCountByRegexp ( exp , boxFilter , pathFilter , typeFilter , ignoreFilter )
2022-12-02 17:30:16 +08:00
return
}
2024-10-24 00:27:26 +08:00
func fullTextSearchCountByRegexp ( exp , boxFilter , pathFilter , typeFilter , ignoreFilter string ) ( matchedBlockCount , matchedRootCount int ) {
2022-12-02 17:30:16 +08:00
fieldFilter := fieldRegexp ( exp )
2024-10-24 00:27:26 +08:00
stmt := "SELECT COUNT(id) AS `matches`, COUNT(DISTINCT(root_id)) AS `docs` FROM `blocks` WHERE " + fieldFilter + " AND type IN " + typeFilter + ignoreFilter
2022-11-29 23:21:19 +08:00
stmt += boxFilter + pathFilter
2023-05-04 10:11:29 +08:00
result , _ := sql . QueryNoLimit ( stmt )
2022-07-28 01:14:49 +08:00
if 1 > len ( result ) {
return
}
matchedBlockCount = int ( result [ 0 ] [ "matches" ] . ( int64 ) )
matchedRootCount = int ( result [ 0 ] [ "docs" ] . ( int64 ) )
return
}
2024-10-24 00:27:26 +08:00
func fullTextSearchByFTS ( query , boxFilter , pathFilter , typeFilter , ignoreFilter , orderBy string , beforeLen , page , pageSize int ) ( ret [ ] * Block , matchedBlockCount , matchedRootCount int ) {
2024-10-27 10:39:20 +08:00
start := time . Now ( )
2024-10-23 23:06:48 +08:00
query = stringQuery ( query )
2022-05-26 15:18:53 +08:00
table := "blocks_fts" // 大小写敏感
if ! Conf . Search . CaseSensitive {
table = "blocks_fts_case_insensitive"
}
projections := "id, parent_id, root_id, hash, box, path, " +
2024-03-24 09:42:11 +08:00
// Search result content snippet returns more text https://github.com/siyuan-note/siyuan/issues/10707
"snippet(" + table + ", 6, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 512) AS hpath, " +
"snippet(" + table + ", 7, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 512) AS name, " +
"snippet(" + table + ", 8, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 512) AS alias, " +
"snippet(" + table + ", 9, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 512) AS memo, " +
2024-09-27 11:38:53 +08:00
"snippet(" + table + ", 10, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 64) AS tag, " +
2024-03-24 09:42:11 +08:00
"snippet(" + table + ", 11, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "', '...', 512) AS content, " +
2022-05-26 15:18:53 +08:00
"fcontent, markdown, length, type, subtype, ial, sort, created, updated"
2023-02-15 13:05:07 +08:00
stmt := "SELECT " + projections + " FROM " + table + " WHERE (`" + table + "` MATCH '" + columnFilter ( ) + ":(" + query + ")'"
stmt += ") AND type IN " + typeFilter
2024-10-24 00:27:26 +08:00
stmt += boxFilter + pathFilter + ignoreFilter + " " + orderBy
2023-04-21 09:34:35 +08:00
stmt += " LIMIT " + strconv . Itoa ( pageSize ) + " OFFSET " + strconv . Itoa ( ( page - 1 ) * pageSize )
2023-04-21 10:46:55 +08:00
blocks := sql . SelectBlocksRawStmt ( stmt , page , pageSize )
2022-05-26 15:18:53 +08:00
ret = fromSQLBlocks ( & blocks , "" , beforeLen )
if 1 > len ( ret ) {
ret = [ ] * Block { }
}
2022-07-28 01:14:49 +08:00
2024-10-24 00:27:26 +08:00
matchedBlockCount , matchedRootCount = fullTextSearchCountByFTS ( query , boxFilter , pathFilter , typeFilter , ignoreFilter )
2024-10-27 10:39:20 +08:00
logging . LogInfof ( "time cost [fts]: %v" , time . Since ( start ) )
2022-11-28 11:24:31 +08:00
return
}
2024-10-24 00:27:26 +08:00
func fullTextSearchCountByFTS ( query , boxFilter , pathFilter , typeFilter , ignoreFilter string ) ( matchedBlockCount , matchedRootCount int ) {
2024-10-23 16:11:13 +08:00
table := "blocks_fts" // 大小写敏感
2023-05-19 10:18:25 +08:00
if ! Conf . Search . CaseSensitive {
table = "blocks_fts_case_insensitive"
}
2024-10-23 16:11:13 +08:00
stmt := "SELECT COUNT(id) AS `matches`, COUNT(DISTINCT(root_id)) AS `docs` FROM `" + table + "` WHERE (`" + table + "` MATCH '" + columnFilter ( ) + ":(" + query + ")'"
2023-05-19 10:18:25 +08:00
stmt += ") AND type IN " + typeFilter
2024-10-24 00:27:26 +08:00
stmt += boxFilter + pathFilter + ignoreFilter
2024-10-23 16:11:13 +08:00
result , _ := sql . QueryNoLimit ( stmt )
if 1 > len ( result ) {
return
}
matchedBlockCount = int ( result [ 0 ] [ "matches" ] . ( int64 ) )
matchedRootCount = int ( result [ 0 ] [ "docs" ] . ( int64 ) )
2023-05-19 10:18:25 +08:00
return
}
2024-10-24 00:27:26 +08:00
func fullTextSearchByFTSWithRoot ( query , boxFilter , pathFilter , typeFilter , ignoreFilter , orderBy string , beforeLen , page , pageSize int ) ( ret [ ] * Block , matchedBlockCount , matchedRootCount int ) {
2024-10-24 10:16:49 +08:00
start := time . Now ( )
2024-10-23 23:06:48 +08:00
query = strings . ReplaceAll ( query , "'" , "''" )
query = strings . ReplaceAll ( query , "\"" , "\"\"" )
2024-10-23 16:45:22 +08:00
keywords := strings . Split ( query , " " )
2024-10-26 23:03:57 +08:00
contentField := columnConcat ( )
2024-10-24 00:13:33 +08:00
var likeFilter string
2024-10-26 10:43:09 +08:00
orderByLike := "("
2024-10-23 16:45:22 +08:00
for i , keyword := range keywords {
2024-10-26 10:43:09 +08:00
likeFilter += "GROUP_CONCAT(" + contentField + ") LIKE '%" + keyword + "%'"
orderByLike += "(docContent LIKE '%" + keyword + "%')"
2024-10-23 16:45:22 +08:00
if i < len ( keywords ) - 1 {
likeFilter += " AND "
2024-10-26 10:43:09 +08:00
orderByLike += " + "
2024-10-23 16:45:22 +08:00
}
}
2024-10-26 10:43:09 +08:00
orderByLike += ")"
dMatchStmt := "SELECT root_id, MAX(CASE WHEN type = 'd' THEN (" + contentField + ") END) AS docContent" +
" FROM blocks WHERE type IN " + typeFilter + boxFilter + pathFilter + ignoreFilter +
" GROUP BY root_id HAVING " + likeFilter + "ORDER BY " + orderByLike + " DESC, MAX(updated) DESC"
cteStmt := "WITH docBlocks AS (" + dMatchStmt + ")"
likeFilter = strings . ReplaceAll ( likeFilter , "GROUP_CONCAT(" + contentField + ")" , "concatContent" )
2024-10-27 10:39:20 +08:00
limit := " LIMIT " + strconv . Itoa ( pageSize ) + " OFFSET " + strconv . Itoa ( ( page - 1 ) * pageSize )
2024-10-26 10:43:09 +08:00
selectStmt := cteStmt + "\nSELECT *, " +
2024-10-26 23:03:57 +08:00
"(" + contentField + ") AS concatContent, " +
2024-10-26 21:17:59 +08:00
"(SELECT COUNT(root_id) FROM docBlocks) AS docs, " +
"(CASE WHEN (root_id IN (SELECT root_id FROM docBlocks) AND (" + strings . ReplaceAll ( likeFilter , "concatContent" , contentField ) + ")) THEN 1 ELSE 0 END) AS blockSort" +
" FROM blocks WHERE type IN " + typeFilter + boxFilter + pathFilter + ignoreFilter +
2024-10-27 10:39:20 +08:00
" AND (id IN (SELECT root_id FROM docBlocks " + limit + ") OR" +
" (root_id IN (SELECT root_id FROM docBlocks" + limit + ") AND (" + likeFilter + ")))"
selectStmt += " " + strings . Replace ( orderBy , "END ASC, " , "END ASC, blockSort DESC, " , 1 )
result , _ := sql . QueryNoLimit ( selectStmt )
resultBlocks := sql . ToBlocks ( result )
2024-10-26 10:43:09 +08:00
if 0 < len ( resultBlocks ) {
matchedRootCount = int ( result [ 0 ] [ "docs" ] . ( int64 ) )
2024-10-26 12:39:10 +08:00
matchedBlockCount = matchedRootCount
2024-10-26 10:43:09 +08:00
}
2024-10-23 23:06:48 +08:00
2024-10-26 10:43:09 +08:00
keywords = gulu . Str . RemoveDuplicatedElem ( keywords )
terms := strings . Join ( keywords , search . TermSep )
ret = fromSQLBlocks ( & resultBlocks , terms , beforeLen )
2024-10-23 16:11:13 +08:00
if 1 > len ( ret ) {
ret = [ ] * Block { }
}
2024-10-27 10:39:20 +08:00
logging . LogInfof ( "time cost [like]: %v" , time . Since ( start ) )
2024-09-21 17:29:01 +08:00
return
}
2024-10-23 16:11:13 +08:00
func highlightByFTS ( query , typeFilter , id string ) ( ret [ ] string ) {
const limit = 256
table := "blocks_fts"
if ! Conf . Search . CaseSensitive {
table = "blocks_fts_case_insensitive"
}
projections := "id, parent_id, root_id, hash, box, path, " +
"highlight(" + table + ", 6, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "') AS hpath, " +
"highlight(" + table + ", 7, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "') AS name, " +
"highlight(" + table + ", 8, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "') AS alias, " +
"highlight(" + table + ", 9, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "') AS memo, " +
"highlight(" + table + ", 10, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "') AS tag, " +
"highlight(" + table + ", 11, '" + search . SearchMarkLeft + "', '" + search . SearchMarkRight + "') AS content, " +
"fcontent, markdown, length, type, subtype, ial, sort, created, updated"
stmt := "SELECT " + projections + " FROM " + table + " WHERE (`" + table + "` MATCH '" + columnFilter ( ) + ":(" + query + ")'"
stmt += ") AND type IN " + typeFilter
stmt += " AND root_id = '" + id + "'"
stmt += " LIMIT " + strconv . Itoa ( limit )
sqlBlocks := sql . SelectBlocksRawStmt ( stmt , 1 , limit )
for _ , block := range sqlBlocks {
keyword := gulu . Str . SubstringsBetween ( block . Content , search . SearchMarkLeft , search . SearchMarkRight )
if 0 < len ( keyword ) {
ret = append ( ret , keyword ... )
}
}
ret = gulu . Str . RemoveDuplicatedElem ( ret )
return
}
func highlightByRegexp ( query , typeFilter , id string ) ( ret [ ] string ) {
fieldFilter := fieldRegexp ( query )
stmt := "SELECT * FROM `blocks` WHERE " + fieldFilter + " AND type IN " + typeFilter
stmt += " AND root_id = '" + id + "'"
regex := regexp . MustCompile ( query )
sqlBlocks := sql . SelectBlocksRegex ( stmt , regex , Conf . Search . Name , Conf . Search . Alias , Conf . Search . Memo , Conf . Search . IAL , 1 , 256 )
for _ , block := range sqlBlocks {
keyword := gulu . Str . SubstringsBetween ( block . Content , search . SearchMarkLeft , search . SearchMarkRight )
if 0 < len ( keyword ) {
ret = append ( ret , keyword ... )
}
}
ret = gulu . Str . RemoveDuplicatedElem ( ret )
return
}
2022-06-08 00:39:18 +08:00
func markSearch ( text string , keyword string , beforeLen int ) ( marked string , score float64 ) {
2022-05-26 15:18:53 +08:00
if 0 == len ( keyword ) {
marked = text
2022-06-08 00:39:18 +08:00
2022-08-16 10:24:38 +08:00
if strings . Contains ( marked , search . SearchMarkLeft ) { // 使用 FTS snippet() 处理过高亮片段,这里简单替换后就返回
2022-12-28 17:07:59 +08:00
marked = util . EscapeHTML ( text )
2022-08-16 10:24:38 +08:00
marked = strings . ReplaceAll ( marked , search . SearchMarkLeft , "<mark>" )
marked = strings . ReplaceAll ( marked , search . SearchMarkRight , "</mark>" )
2022-06-08 00:39:18 +08:00
return
}
2022-08-16 10:24:38 +08:00
keywords := gulu . Str . SubstringsBetween ( marked , search . SearchMarkLeft , search . SearchMarkRight )
2022-06-23 19:35:59 +08:00
keywords = gulu . Str . RemoveDuplicatedElem ( keywords )
2022-06-08 00:39:18 +08:00
keyword = strings . Join ( keywords , search . TermSep )
2022-08-16 10:24:38 +08:00
marked = strings . ReplaceAll ( marked , search . SearchMarkLeft , "" )
marked = strings . ReplaceAll ( marked , search . SearchMarkRight , "" )
2022-06-08 00:39:18 +08:00
_ , marked = search . MarkText ( marked , keyword , beforeLen , Conf . Search . CaseSensitive )
2022-05-26 15:18:53 +08:00
return
}
2022-06-08 00:39:18 +08:00
pos , marked := search . MarkText ( text , keyword , beforeLen , Conf . Search . CaseSensitive )
2022-05-26 15:18:53 +08:00
if - 1 < pos {
if 0 == pos {
score = 1
}
score += float64 ( strings . Count ( marked , "<mark>" ) )
winkler := smetrics . JaroWinkler ( text , keyword , 0.7 , 4 )
score += winkler
}
score = - score // 分越小排序越靠前
return
}
func fromSQLBlocks ( sqlBlocks * [ ] * sql . Block , terms string , beforeLen int ) ( ret [ ] * Block ) {
for _ , sqlBlock := range * sqlBlocks {
ret = append ( ret , fromSQLBlock ( sqlBlock , terms , beforeLen ) )
}
return
}
func fromSQLBlock ( sqlBlock * sql . Block , terms string , beforeLen int ) ( block * Block ) {
if nil == sqlBlock {
return
}
id := sqlBlock . ID
2024-03-24 10:51:19 +08:00
content := sqlBlock . Content
if 1 < strings . Count ( content , search . SearchMarkRight ) && strings . HasSuffix ( content , search . SearchMarkRight + "..." ) {
// 返回多个关键字命中时需要检查最后一个关键字是否被截断
firstKeyword := gulu . Str . SubStringBetween ( content , search . SearchMarkLeft , search . SearchMarkRight )
lastKeyword := gulu . Str . LastSubStringBetween ( content , search . SearchMarkLeft , search . SearchMarkRight )
if firstKeyword != lastKeyword {
// 如果第一个关键字和最后一个关键字不相同,说明最后一个关键字被截断了
// 此时需要将 content 中的最后一个关键字替换为完整的关键字
content = strings . TrimSuffix ( content , search . SearchMarkLeft + lastKeyword + search . SearchMarkRight + "..." )
content += search . SearchMarkLeft + firstKeyword + search . SearchMarkRight + "..."
}
}
content = util . EscapeHTML ( content ) // Search dialog XSS https://github.com/siyuan-note/siyuan/issues/8525
2022-06-08 00:39:18 +08:00
content , _ = markSearch ( content , terms , beforeLen )
2022-05-26 15:18:53 +08:00
content = maxContent ( content , 5120 )
2024-09-27 11:38:53 +08:00
tag , _ := markSearch ( sqlBlock . Tag , terms , beforeLen )
2022-08-28 09:00:01 +08:00
markdown := maxContent ( sqlBlock . Markdown , 5120 )
2024-04-12 21:55:53 +08:00
fContent := util . EscapeHTML ( sqlBlock . FContent ) // fContent 会用于和 content 对比,在反链计算时用于判断是否是列表项下第一个子块,所以也需要转义 https://github.com/siyuan-note/siyuan/issues/11001
2022-05-26 15:18:53 +08:00
block = & Block {
2022-08-28 10:43:56 +08:00
Box : sqlBlock . Box ,
2023-06-13 09:34:36 +08:00
Path : sqlBlock . Path ,
2022-08-28 10:43:56 +08:00
ID : id ,
RootID : sqlBlock . RootID ,
ParentID : sqlBlock . ParentID ,
Alias : sqlBlock . Alias ,
Name : sqlBlock . Name ,
Memo : sqlBlock . Memo ,
2024-09-27 11:38:53 +08:00
Tag : tag ,
2022-08-28 10:43:56 +08:00
Content : content ,
2024-04-12 21:55:53 +08:00
FContent : fContent ,
2022-08-28 10:43:56 +08:00
Markdown : markdown ,
Type : treenode . FromAbbrType ( sqlBlock . Type ) ,
SubType : sqlBlock . SubType ,
2022-11-30 12:11:49 +08:00
Sort : sqlBlock . Sort ,
2022-05-26 15:18:53 +08:00
}
if "" != sqlBlock . IAL {
block . IAL = map [ string ] string { }
ialStr := strings . TrimPrefix ( sqlBlock . IAL , "{:" )
ialStr = strings . TrimSuffix ( ialStr , "}" )
ial := parse . Tokens2IAL ( [ ] byte ( ialStr ) )
for _ , kv := range ial {
block . IAL [ kv [ 0 ] ] = kv [ 1 ]
}
}
2024-10-26 23:43:33 +08:00
hPath , _ := markSearch ( sqlBlock . HPath , "" , 18 )
2022-05-26 15:18:53 +08:00
if ! strings . HasPrefix ( hPath , "/" ) {
hPath = "/" + hPath
}
block . HPath = hPath
if "" != block . Name {
2022-06-08 00:39:18 +08:00
block . Name , _ = markSearch ( block . Name , terms , 256 )
2022-05-26 15:18:53 +08:00
}
if "" != block . Alias {
2022-06-08 00:39:18 +08:00
block . Alias , _ = markSearch ( block . Alias , terms , 256 )
2022-05-26 15:18:53 +08:00
}
if "" != block . Memo {
2022-06-08 00:39:18 +08:00
block . Memo , _ = markSearch ( block . Memo , terms , 256 )
2022-05-26 15:18:53 +08:00
}
return
}
func maxContent ( content string , maxLen int ) string {
2022-10-09 23:55:04 +08:00
idx := strings . Index ( content , "<mark>" )
if 128 < maxLen && maxLen <= idx {
head := bytes . Buffer { }
for i := 0 ; i < 512 ; i ++ {
r , size := utf8 . DecodeLastRuneInString ( content [ : idx ] )
head . WriteRune ( r )
idx -= size
if 64 < head . Len ( ) {
break
}
}
content = util . Reverse ( head . String ( ) ) + content [ idx : ]
}
2022-05-26 15:18:53 +08:00
if maxLen < utf8 . RuneCountInString ( content ) {
return gulu . Str . SubStr ( content , maxLen ) + "..."
}
return content
}
2022-11-28 11:24:31 +08:00
func fieldRegexp ( regexp string ) string {
buf := bytes . Buffer { }
2022-12-31 12:13:18 +08:00
buf . WriteString ( "(" )
2022-11-28 11:24:31 +08:00
buf . WriteString ( "content REGEXP '" )
buf . WriteString ( regexp )
2022-11-28 17:53:41 +08:00
buf . WriteString ( "'" )
2022-11-28 11:24:31 +08:00
if Conf . Search . Name {
buf . WriteString ( " OR name REGEXP '" )
buf . WriteString ( regexp )
2022-11-28 17:53:41 +08:00
buf . WriteString ( "'" )
2022-11-28 11:24:31 +08:00
}
if Conf . Search . Alias {
buf . WriteString ( " OR alias REGEXP '" )
buf . WriteString ( regexp )
2022-11-28 17:53:41 +08:00
buf . WriteString ( "'" )
2022-11-28 11:24:31 +08:00
}
if Conf . Search . Memo {
buf . WriteString ( " OR memo REGEXP '" )
buf . WriteString ( regexp )
2022-11-28 17:53:41 +08:00
buf . WriteString ( "'" )
2022-11-28 11:24:31 +08:00
}
2023-02-15 14:35:44 +08:00
if Conf . Search . IAL {
2022-11-28 11:24:31 +08:00
buf . WriteString ( " OR ial REGEXP '" )
buf . WriteString ( regexp )
2022-11-28 17:53:41 +08:00
buf . WriteString ( "'" )
2022-11-28 11:24:31 +08:00
}
buf . WriteString ( " OR tag REGEXP '" )
buf . WriteString ( regexp )
2022-12-31 12:13:18 +08:00
buf . WriteString ( "')" )
2022-11-28 11:24:31 +08:00
return buf . String ( )
}
2022-05-26 15:18:53 +08:00
func columnFilter ( ) string {
buf := bytes . Buffer { }
buf . WriteString ( "{content" )
if Conf . Search . Name {
buf . WriteString ( " name" )
}
if Conf . Search . Alias {
buf . WriteString ( " alias" )
}
if Conf . Search . Memo {
buf . WriteString ( " memo" )
}
2023-02-15 14:35:44 +08:00
if Conf . Search . IAL {
2023-02-15 14:26:50 +08:00
buf . WriteString ( " ial" )
}
2022-05-26 15:18:53 +08:00
buf . WriteString ( " tag}" )
return buf . String ( )
}
2024-10-26 23:03:57 +08:00
func columnConcat ( ) string {
buf := bytes . Buffer { }
buf . WriteString ( "content" )
if Conf . Search . Name {
buf . WriteString ( "||name" )
}
if Conf . Search . Alias {
buf . WriteString ( "||alias" )
}
if Conf . Search . Memo {
buf . WriteString ( "||memo" )
}
if Conf . Search . IAL {
buf . WriteString ( "||ial" )
}
buf . WriteString ( "||tag" )
return buf . String ( )
}
2022-05-26 15:18:53 +08:00
func stringQuery ( query string ) string {
2024-01-14 11:42:28 +08:00
if "" == strings . TrimSpace ( query ) {
return "\"" + query + "\""
}
2022-05-26 15:18:53 +08:00
query = strings . ReplaceAll ( query , "\"" , "\"\"" )
2022-06-08 10:09:19 +08:00
query = strings . ReplaceAll ( query , "'" , "''" )
2022-05-26 15:18:53 +08:00
buf := bytes . Buffer { }
parts := strings . Split ( query , " " )
for _ , part := range parts {
part = strings . TrimSpace ( part )
part = "\"" + part + "\""
buf . WriteString ( part )
buf . WriteString ( " " )
}
return strings . TrimSpace ( buf . String ( ) )
}
2022-10-13 01:00:59 +08:00
2022-10-26 09:51:09 +08:00
// markReplaceSpan 用于处理搜索高亮。
2022-11-29 22:42:15 +08:00
func markReplaceSpan ( n * ast . Node , unlinks * [ ] * ast . Node , keywords [ ] string , markSpanDataType string , luteEngine * lute . Lute ) bool {
if ast . NodeText == n . Type {
2024-05-17 11:20:24 +08:00
text := n . Content ( )
2024-01-19 17:53:21 +08:00
escapedText := util . EscapeHTML ( text )
2023-12-05 22:47:57 +08:00
escapedKeywords := make ( [ ] string , len ( keywords ) )
for i , keyword := range keywords {
escapedKeywords [ i ] = util . EscapeHTML ( keyword )
}
2024-01-19 17:53:21 +08:00
hText := search . EncloseHighlighting ( escapedText , escapedKeywords , search . GetMarkSpanStart ( markSpanDataType ) , search . GetMarkSpanEnd ( ) , Conf . Search . CaseSensitive , false )
if hText != escapedText {
text = hText
}
2022-11-29 22:42:15 +08:00
n . Tokens = gulu . Str . ToBytes ( text )
2023-02-16 10:41:02 +08:00
if bytes . Contains ( n . Tokens , [ ] byte ( search . MarkDataType ) ) {
2022-11-29 22:42:15 +08:00
linkTree := parse . Inline ( "" , n . Tokens , luteEngine . ParseOptions )
var children [ ] * ast . Node
for c := linkTree . Root . FirstChild . FirstChild ; nil != c ; c = c . Next {
children = append ( children , c )
}
for _ , c := range children {
n . InsertBefore ( c )
}
* unlinks = append ( * unlinks , n )
return true
2022-10-20 00:58:54 +08:00
}
2022-11-29 22:42:15 +08:00
} else if ast . NodeTextMark == n . Type {
2022-12-28 16:00:40 +08:00
// 搜索结果高亮支持大部分行级元素 https://github.com/siyuan-note/siyuan/issues/6745
2023-05-19 10:18:25 +08:00
2022-12-28 16:00:40 +08:00
if n . IsTextMarkType ( "inline-math" ) || n . IsTextMarkType ( "inline-memo" ) {
return false
}
2024-05-17 11:20:24 +08:00
var text string
if n . IsTextMarkType ( "code" ) {
// code 在前面的 n.
for i , k := range keywords {
keywords [ i ] = html . EscapeString ( k )
}
text = n . TextMarkTextContent
} else {
text = n . Content ( )
}
2023-02-16 10:41:02 +08:00
startTag := search . GetMarkSpanStart ( markSpanDataType )
2023-04-25 11:16:48 +08:00
text = search . EncloseHighlighting ( text , keywords , startTag , search . GetMarkSpanEnd ( ) , Conf . Search . CaseSensitive , false )
2023-02-16 10:41:02 +08:00
if strings . Contains ( text , search . MarkDataType ) {
dataType := search . GetMarkSpanStart ( n . TextMarkType + " " + search . MarkDataType )
2022-12-28 16:00:40 +08:00
text = strings . ReplaceAll ( text , startTag , dataType )
tokens := gulu . Str . ToBytes ( text )
linkTree := parse . Inline ( "" , tokens , luteEngine . ParseOptions )
var children [ ] * ast . Node
for c := linkTree . Root . FirstChild . FirstChild ; nil != c ; c = c . Next {
if ast . NodeText == c . Type {
c . Type = ast . NodeTextMark
c . TextMarkType = n . TextMarkType
c . TextMarkTextContent = string ( c . Tokens )
2023-01-03 22:57:43 +08:00
if n . IsTextMarkType ( "a" ) {
c . TextMarkAHref , c . TextMarkATitle = n . TextMarkAHref , n . TextMarkATitle
2023-02-10 15:21:20 +08:00
} else if treenode . IsBlockRef ( n ) {
2023-01-03 22:57:43 +08:00
c . TextMarkBlockRefID = n . TextMarkBlockRefID
c . TextMarkBlockRefSubtype = n . TextMarkBlockRefSubtype
2023-02-10 15:21:20 +08:00
} else if treenode . IsFileAnnotationRef ( n ) {
2023-01-03 22:57:43 +08:00
c . TextMarkFileAnnotationRefID = n . TextMarkFileAnnotationRefID
}
} else if ast . NodeTextMark == c . Type {
if n . IsTextMarkType ( "a" ) {
c . TextMarkAHref , c . TextMarkATitle = n . TextMarkAHref , n . TextMarkATitle
2023-02-10 15:21:20 +08:00
} else if treenode . IsBlockRef ( n ) {
2023-01-03 22:57:43 +08:00
c . TextMarkBlockRefID = n . TextMarkBlockRefID
c . TextMarkBlockRefSubtype = n . TextMarkBlockRefSubtype
2023-02-10 15:21:20 +08:00
} else if treenode . IsFileAnnotationRef ( n ) {
2023-01-03 22:57:43 +08:00
c . TextMarkFileAnnotationRefID = n . TextMarkFileAnnotationRefID
}
2022-12-28 16:00:40 +08:00
}
children = append ( children , c )
if nil != n . Next && ast . NodeKramdownSpanIAL == n . Next . Type {
c . KramdownIAL = n . KramdownIAL
ial := & ast . Node { Type : ast . NodeKramdownSpanIAL , Tokens : n . Next . Tokens }
children = append ( children , ial )
}
}
for _ , c := range children {
n . InsertBefore ( c )
}
* unlinks = append ( * unlinks , n )
return true
}
2022-10-20 00:58:54 +08:00
}
2022-10-26 09:51:09 +08:00
return false
2022-10-20 00:58:54 +08:00
}
2022-10-26 09:51:09 +08:00
// markReplaceSpanWithSplit 用于处理虚拟引用和反链提及高亮。
func markReplaceSpanWithSplit ( text string , keywords [ ] string , replacementStart , replacementEnd string ) ( ret string ) {
2023-02-24 10:04:25 +08:00
// 虚拟引用和反链提及关键字按最长匹配优先 https://github.com/siyuan-note/siyuan/issues/7465
sort . Slice ( keywords , func ( i , j int ) bool { return len ( keywords [ i ] ) > len ( keywords [ j ] ) } )
2023-04-25 11:16:48 +08:00
tmp := search . EncloseHighlighting ( text , keywords , replacementStart , replacementEnd , Conf . Search . CaseSensitive , true )
2022-11-16 10:34:02 +08:00
parts := strings . Split ( tmp , replacementEnd )
buf := bytes . Buffer { }
for i := 0 ; i < len ( parts ) ; i ++ {
if i >= len ( parts ) - 1 {
buf . WriteString ( parts [ i ] )
break
2022-10-13 01:00:59 +08:00
}
2022-11-16 10:34:02 +08:00
if nextPart := parts [ i + 1 ] ; 0 < len ( nextPart ) && lex . IsASCIILetter ( nextPart [ 0 ] ) {
// 取消已经高亮的部分
part := strings . ReplaceAll ( parts [ i ] , replacementStart , "" )
buf . WriteString ( part )
continue
2022-10-14 22:32:13 +08:00
}
2022-10-17 22:21:57 +08:00
2022-11-16 10:34:02 +08:00
buf . WriteString ( parts [ i ] )
buf . WriteString ( replacementEnd )
2022-10-17 22:21:57 +08:00
}
2022-11-16 10:34:02 +08:00
ret = buf . String ( )
2022-10-17 22:21:57 +08:00
return
2022-10-13 01:00:59 +08:00
}
2024-01-10 09:48:14 +08:00
var (
searchIgnoreLastModified int64
searchIgnore [ ] string
searchIgnoreLock = sync . Mutex { }
)
func getSearchIgnoreLines ( ) ( ret [ ] string ) {
// Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
now := time . Now ( ) . UnixMilli ( )
if now - searchIgnoreLastModified < 30 * 1000 {
return searchIgnore
}
searchIgnoreLock . Lock ( )
defer searchIgnoreLock . Unlock ( )
searchIgnoreLastModified = now
searchIgnorePath := filepath . Join ( util . DataDir , ".siyuan" , "searchignore" )
err := os . MkdirAll ( filepath . Dir ( searchIgnorePath ) , 0755 )
2024-09-04 04:40:50 +03:00
if err != nil {
2024-01-10 09:48:14 +08:00
return
}
if ! gulu . File . IsExist ( searchIgnorePath ) {
2024-09-04 04:40:50 +03:00
if err = gulu . File . WriteFileSafer ( searchIgnorePath , nil , 0644 ) ; err != nil {
2024-01-10 09:48:14 +08:00
logging . LogErrorf ( "create searchignore [%s] failed: %s" , searchIgnorePath , err )
return
}
}
data , err := os . ReadFile ( searchIgnorePath )
2024-09-04 04:40:50 +03:00
if err != nil {
2024-01-10 09:48:14 +08:00
logging . LogErrorf ( "read searchignore [%s] failed: %s" , searchIgnorePath , err )
return
}
dataStr := string ( data )
dataStr = strings . ReplaceAll ( dataStr , "\r\n" , "\n" )
ret = strings . Split ( dataStr , "\n" )
ret = gulu . Str . RemoveDuplicatedElem ( ret )
if 0 < len ( ret ) && "" == ret [ 0 ] {
ret = ret [ 1 : ]
}
searchIgnore = nil
for _ , line := range ret {
searchIgnore = append ( searchIgnore , line )
}
return
}
var (
refSearchIgnoreLastModified int64
refSearchIgnore [ ] string
refSearchIgnoreLock = sync . Mutex { }
)
func getRefSearchIgnoreLines ( ) ( ret [ ] string ) {
// Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
now := time . Now ( ) . UnixMilli ( )
if now - refSearchIgnoreLastModified < 30 * 1000 {
return refSearchIgnore
}
refSearchIgnoreLock . Lock ( )
defer refSearchIgnoreLock . Unlock ( )
refSearchIgnoreLastModified = now
searchIgnorePath := filepath . Join ( util . DataDir , ".siyuan" , "refsearchignore" )
err := os . MkdirAll ( filepath . Dir ( searchIgnorePath ) , 0755 )
2024-09-04 04:40:50 +03:00
if err != nil {
2024-01-10 09:48:14 +08:00
return
}
if ! gulu . File . IsExist ( searchIgnorePath ) {
2024-09-04 04:40:50 +03:00
if err = gulu . File . WriteFileSafer ( searchIgnorePath , nil , 0644 ) ; err != nil {
2024-01-10 09:48:14 +08:00
logging . LogErrorf ( "create refsearchignore [%s] failed: %s" , searchIgnorePath , err )
return
}
}
data , err := os . ReadFile ( searchIgnorePath )
2024-09-04 04:40:50 +03:00
if err != nil {
2024-01-10 09:48:14 +08:00
logging . LogErrorf ( "read refsearchignore [%s] failed: %s" , searchIgnorePath , err )
return
}
dataStr := string ( data )
dataStr = strings . ReplaceAll ( dataStr , "\r\n" , "\n" )
ret = strings . Split ( dataStr , "\n" )
ret = gulu . Str . RemoveDuplicatedElem ( ret )
if 0 < len ( ret ) && "" == ret [ 0 ] {
ret = ret [ 1 : ]
}
refSearchIgnore = nil
for _ , line := range ret {
refSearchIgnore = append ( refSearchIgnore , line )
}
return
}
2024-01-14 11:42:28 +08:00
func filterQueryInvisibleChars ( query string ) string {
query = strings . ReplaceAll ( query , " " , "_@full_width_space@_" )
query = gulu . Str . RemoveInvisible ( query )
query = strings . ReplaceAll ( query , "_@full_width_space@_" , " " )
return query
}
2024-09-19 23:56:54 +08:00
func replaceCaseInsensitive ( input , old , new [ ] byte ) [ ] byte {
re := regexp . MustCompile ( "(?i)" + regexp . QuoteMeta ( string ( old ) ) )
return [ ] byte ( re . ReplaceAllString ( string ( input ) , string ( new ) ) )
}