2023-06-24 20:39:55 +08:00
// SiYuan - Refactor your thinking
2022-05-26 15:18:53 +08:00
// Copyright (c) 2020-present, b3log.org
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package model
import (
2023-02-10 14:28:10 +08:00
"bytes"
2022-05-26 15:18:53 +08:00
"fmt"
2024-01-21 21:27:50 +08:00
"io/fs"
2023-02-10 14:28:10 +08:00
"path/filepath"
2023-01-25 20:21:53 +08:00
"runtime"
2023-02-04 14:58:35 +08:00
"runtime/debug"
2022-05-26 15:18:53 +08:00
"strings"
2023-01-25 20:21:53 +08:00
"sync"
2022-05-26 15:18:53 +08:00
"time"
2024-04-24 19:51:15 +08:00
"github.com/88250/go-humanize"
2023-02-10 14:28:10 +08:00
"github.com/88250/gulu"
"github.com/88250/lute/ast"
2024-04-26 18:23:21 +08:00
"github.com/88250/lute/editor"
2023-05-15 14:56:12 +08:00
"github.com/88250/lute/html"
2022-05-26 15:18:53 +08:00
"github.com/88250/lute/parse"
2023-01-25 20:21:53 +08:00
"github.com/panjf2000/ants/v2"
2022-09-16 10:47:54 +08:00
"github.com/siyuan-note/eventbus"
2023-02-10 14:28:10 +08:00
"github.com/siyuan-note/filelock"
2022-07-17 12:22:32 +08:00
"github.com/siyuan-note/logging"
2024-04-09 22:26:18 +08:00
"github.com/siyuan-note/siyuan/kernel/av"
2022-05-26 15:18:53 +08:00
"github.com/siyuan-note/siyuan/kernel/cache"
"github.com/siyuan-note/siyuan/kernel/filesys"
"github.com/siyuan-note/siyuan/kernel/sql"
2023-01-25 20:21:53 +08:00
"github.com/siyuan-note/siyuan/kernel/task"
2022-05-26 15:18:53 +08:00
"github.com/siyuan-note/siyuan/kernel/treenode"
"github.com/siyuan-note/siyuan/kernel/util"
)
2024-01-05 20:54:46 +08:00
func UpsertIndexes ( paths [ ] string ) {
var syFiles [ ] string
for _ , p := range paths {
if strings . HasSuffix ( p , "/" ) {
syFiles = append ( syFiles , listSyFiles ( p ) ... )
continue
}
if strings . HasSuffix ( p , ".sy" ) {
syFiles = append ( syFiles , p )
}
}
syFiles = gulu . Str . RemoveDuplicatedElem ( syFiles )
upsertIndexes ( syFiles )
}
func RemoveIndexes ( paths [ ] string ) {
var syFiles [ ] string
for _ , p := range paths {
if strings . HasSuffix ( p , "/" ) {
syFiles = append ( syFiles , listSyFiles ( p ) ... )
continue
}
if strings . HasSuffix ( p , ".sy" ) {
syFiles = append ( syFiles , p )
}
}
syFiles = gulu . Str . RemoveDuplicatedElem ( syFiles )
removeIndexes ( syFiles )
}
func listSyFiles ( dir string ) ( ret [ ] string ) {
dirPath := filepath . Join ( util . DataDir , dir )
2024-11-21 10:59:29 +08:00
err := filelock . Walk ( dirPath , func ( path string , d fs . DirEntry , err error ) error {
2024-09-04 04:40:50 +03:00
if err != nil {
2024-01-05 20:54:46 +08:00
logging . LogWarnf ( "walk dir [%s] failed: %s" , dirPath , err )
return err
}
if d . IsDir ( ) {
return nil
}
if strings . HasSuffix ( path , ".sy" ) {
p := filepath . ToSlash ( strings . TrimPrefix ( path , util . DataDir ) )
ret = append ( ret , p )
}
return nil
} )
2024-09-04 04:40:50 +03:00
if err != nil {
2024-01-05 20:54:46 +08:00
logging . LogWarnf ( "walk dir [%s] failed: %s" , dirPath , err )
}
return
}
2023-01-23 10:44:22 +08:00
func ( box * Box ) Unindex ( ) {
2023-02-07 09:20:32 +08:00
task . AppendTask ( task . DatabaseIndex , unindex , box . ID )
2024-05-12 23:04:00 +08:00
go func ( ) {
2024-10-14 20:39:16 +08:00
sql . FlushQueue ( )
2024-05-12 23:04:00 +08:00
ResetVirtualBlockRefCache ( )
} ( )
2023-01-23 10:44:22 +08:00
}
func unindex ( boxID string ) {
ids := treenode . RemoveBlockTreesByBoxID ( boxID )
RemoveRecentDoc ( ids )
2023-01-23 18:30:52 +08:00
sql . DeleteBoxQueue ( boxID )
2023-01-23 10:44:22 +08:00
}
2023-01-23 11:48:35 +08:00
func ( box * Box ) Index ( ) {
2024-05-11 17:09:19 +08:00
task . AppendTask ( task . DatabaseIndexRef , removeBoxRefs , box . ID )
2023-02-07 09:20:32 +08:00
task . AppendTask ( task . DatabaseIndex , index , box . ID )
2023-01-23 18:30:52 +08:00
task . AppendTask ( task . DatabaseIndexRef , IndexRefs )
2024-05-12 23:04:00 +08:00
go func ( ) {
2024-10-14 20:39:16 +08:00
sql . FlushQueue ( )
2024-05-12 23:04:00 +08:00
ResetVirtualBlockRefCache ( )
} ( )
2023-01-23 10:44:22 +08:00
}
2024-05-11 17:09:19 +08:00
func removeBoxRefs ( boxID string ) {
sql . DeleteBoxRefsQueue ( boxID )
}
2023-01-23 11:48:35 +08:00
func index ( boxID string ) {
2023-01-23 10:44:22 +08:00
box := Conf . Box ( boxID )
if nil == box {
return
}
2022-05-26 15:18:53 +08:00
util . SetBootDetails ( "Listing files..." )
files := box . ListFiles ( "/" )
boxLen := len ( Conf . GetOpenedBoxes ( ) )
if 1 > boxLen {
boxLen = 1
}
2023-12-08 13:05:50 +08:00
bootProgressPart := int32 ( 30.0 / float64 ( boxLen ) / float64 ( len ( files ) ) )
2022-05-26 15:18:53 +08:00
2023-01-23 11:48:35 +08:00
start := time . Now ( )
2023-02-10 14:28:10 +08:00
luteEngine := util . NewLute ( )
2023-12-11 00:34:18 +08:00
var treeCount int
var treeSize int64
lock := sync . Mutex { }
2023-05-15 14:56:12 +08:00
util . PushStatusBar ( fmt . Sprintf ( "[" + html . EscapeString ( box . Name ) + "] " + Conf . Language ( 64 ) , len ( files ) ) )
2022-05-26 15:18:53 +08:00
2023-01-25 20:21:53 +08:00
poolSize := runtime . NumCPU ( )
if 4 < poolSize {
poolSize = 4
}
waitGroup := & sync . WaitGroup { }
2024-04-09 22:26:18 +08:00
var avNodes [ ] * ast . Node
2023-01-25 20:21:53 +08:00
p , _ := ants . NewPoolWithFunc ( poolSize , func ( arg interface { } ) {
defer waitGroup . Done ( )
file := arg . ( * FileInfo )
2023-12-11 00:34:18 +08:00
lock . Lock ( )
treeSize += file . size
treeCount ++
i := treeCount
lock . Unlock ( )
2023-01-23 11:48:35 +08:00
tree , err := filesys . LoadTree ( box . ID , file . path , luteEngine )
2024-09-04 04:40:50 +03:00
if err != nil {
2023-01-23 11:48:35 +08:00
logging . LogErrorf ( "read box [%s] tree [%s] failed: %s" , box . ID , file . path , err )
2023-01-25 20:21:53 +08:00
return
2022-05-26 15:18:53 +08:00
}
docIAL := parse . IAL2MapUnEsc ( tree . Root . KramdownIAL )
2023-01-23 11:20:05 +08:00
if "" == docIAL [ "updated" ] { // 早期的数据可能没有 updated 属性,这里进行订正
2023-01-09 00:22:18 +08:00
updated := util . TimeFromID ( tree . Root . ID )
tree . Root . SetIALAttr ( "updated" , updated )
docIAL [ "updated" ] = updated
2024-09-28 17:38:50 +08:00
if _ , writeErr := filesys . WriteTree ( tree ) ; nil != writeErr {
2023-01-23 18:44:19 +08:00
logging . LogErrorf ( "write tree [%s] failed: %s" , tree . Path , writeErr )
}
2023-01-09 00:22:18 +08:00
}
2024-04-09 22:26:18 +08:00
lock . Lock ( )
avNodes = append ( avNodes , tree . Root . ChildrenByType ( ast . NodeAttributeView ) ... )
lock . Unlock ( )
2023-01-23 11:48:35 +08:00
cache . PutDocIAL ( file . path , docIAL )
2023-01-25 20:46:17 +08:00
treenode . IndexBlockTree ( tree )
2024-03-15 22:53:37 +08:00
sql . IndexTreeQueue ( tree )
2022-09-19 23:46:43 +08:00
util . IncBootProgress ( bootProgressPart , fmt . Sprintf ( Conf . Language ( 92 ) , util . ShortPathForBootingDisplay ( tree . Path ) ) )
2022-05-26 15:18:53 +08:00
if 1 < i && 0 == i % 64 {
2023-12-11 00:14:58 +08:00
util . PushStatusBar ( fmt . Sprintf ( Conf . Language ( 88 ) , i , ( len ( files ) ) - i ) )
2022-05-26 15:18:53 +08:00
}
2023-01-25 20:21:53 +08:00
} )
for _ , file := range files {
if file . isdir || ! strings . HasSuffix ( file . name , ".sy" ) {
continue
}
waitGroup . Add ( 1 )
2023-12-11 00:14:58 +08:00
invokeErr := p . Invoke ( file )
if nil != invokeErr {
logging . LogErrorf ( "invoke [%s] failed: %s" , file . path , invokeErr )
continue
}
2022-05-26 15:18:53 +08:00
}
2023-01-25 20:21:53 +08:00
waitGroup . Wait ( )
p . Release ( )
2022-05-26 15:18:53 +08:00
2024-04-09 22:26:18 +08:00
// 关联数据库和块
av . BatchUpsertBlockRel ( avNodes )
2022-05-26 15:18:53 +08:00
box . UpdateHistoryGenerated ( ) // 初始化历史生成时间为当前时间
end := time . Now ( )
elapsed := end . Sub ( start ) . Seconds ( )
2024-04-24 20:10:08 +08:00
logging . LogInfof ( "rebuilt database for notebook [%s] in [%.2fs], tree [count=%d, size=%s]" , box . ID , elapsed , treeCount , humanize . BytesCustomCeil ( uint64 ( treeSize ) , 2 ) )
2023-02-04 14:58:35 +08:00
debug . FreeOSMemory ( )
2022-05-26 15:18:53 +08:00
return
}
func IndexRefs ( ) {
start := time . Now ( )
util . SetBootDetails ( "Resolving refs..." )
2023-01-23 18:47:23 +08:00
util . PushStatusBar ( Conf . Language ( 54 ) )
2022-05-26 15:18:53 +08:00
util . SetBootDetails ( "Indexing refs..." )
2023-01-23 18:30:52 +08:00
2023-02-10 14:28:10 +08:00
var defBlockIDs [ ] string
luteEngine := util . NewLute ( )
boxes := Conf . GetOpenedBoxes ( )
for _ , box := range boxes {
pages := pagedPaths ( filepath . Join ( util . DataDir , box . ID ) , 32 )
for _ , paths := range pages {
for _ , treeAbsPath := range paths {
data , readErr := filelock . ReadFile ( treeAbsPath )
if nil != readErr {
logging . LogWarnf ( "get data [path=%s] failed: %s" , treeAbsPath , readErr )
2022-05-26 15:18:53 +08:00
continue
}
2023-02-10 14:28:10 +08:00
if ! bytes . Contains ( data , [ ] byte ( "TextMarkBlockRefID" ) ) && ! bytes . Contains ( data , [ ] byte ( "TextMarkFileAnnotationRefID" ) ) {
2022-05-26 15:18:53 +08:00
continue
}
2023-02-10 14:28:10 +08:00
p := filepath . ToSlash ( strings . TrimPrefix ( treeAbsPath , filepath . Join ( util . DataDir , box . ID ) ) )
tree , parseErr := filesys . LoadTreeByData ( data , box . ID , p , luteEngine )
if nil != parseErr {
logging . LogWarnf ( "parse json to tree [%s] failed: %s" , treeAbsPath , parseErr )
2022-05-26 15:18:53 +08:00
continue
}
2023-02-10 14:28:10 +08:00
ast . Walk ( tree . Root , func ( n * ast . Node , entering bool ) ast . WalkStatus {
if ! entering {
return ast . WalkContinue
}
2023-02-10 15:21:20 +08:00
if treenode . IsBlockRef ( n ) || treenode . IsFileAnnotationRef ( n ) {
defBlockIDs = append ( defBlockIDs , tree . Root . ID )
2023-02-10 14:28:10 +08:00
}
return ast . WalkContinue
} )
}
}
}
2022-05-26 15:18:53 +08:00
2023-02-10 14:28:10 +08:00
defBlockIDs = gulu . Str . RemoveDuplicatedElem ( defBlockIDs )
2022-05-26 15:18:53 +08:00
2023-02-10 14:28:10 +08:00
i := 0
size := len ( defBlockIDs )
if 0 < size {
2023-12-08 13:05:50 +08:00
bootProgressPart := int32 ( 10.0 / float64 ( size ) )
2023-02-10 14:28:10 +08:00
for _ , defBlockID := range defBlockIDs {
2024-03-10 23:27:13 +08:00
defTree , loadErr := LoadTreeByBlockID ( defBlockID )
2023-02-10 14:28:10 +08:00
if nil != loadErr {
continue
}
util . IncBootProgress ( bootProgressPart , "Indexing ref " + defTree . ID )
2024-05-13 21:30:48 +08:00
sql . UpdateRefsTreeQueue ( defTree )
2023-02-10 14:28:10 +08:00
if 1 < i && 0 == i % 64 {
util . PushStatusBar ( fmt . Sprintf ( Conf . Language ( 55 ) , i ) )
2022-05-26 15:18:53 +08:00
}
2023-02-10 14:28:10 +08:00
i ++
2022-05-26 15:18:53 +08:00
}
}
2023-02-10 14:28:10 +08:00
logging . LogInfof ( "resolved refs [%d] in [%dms]" , size , time . Now ( ) . Sub ( start ) . Milliseconds ( ) )
2023-01-23 18:53:08 +08:00
util . PushStatusBar ( fmt . Sprintf ( Conf . Language ( 55 ) , i ) )
2022-05-26 15:18:53 +08:00
}
2024-05-10 16:20:02 +08:00
var indexEmbedBlockLock = sync . Mutex { }
2023-01-26 23:30:29 +08:00
// IndexEmbedBlockJob 嵌入块支持搜索 https://github.com/siyuan-note/siyuan/issues/7112
func IndexEmbedBlockJob ( ) {
2024-05-10 16:20:02 +08:00
task . AppendTaskWithTimeout ( task . DatabaseIndexEmbedBlock , 30 * time . Second , autoIndexEmbedBlock )
2023-01-26 12:34:57 +08:00
}
2024-05-10 16:20:02 +08:00
func autoIndexEmbedBlock ( ) {
indexEmbedBlockLock . Lock ( )
defer indexEmbedBlockLock . Unlock ( )
embedBlocks := sql . QueryEmptyContentEmbedBlocks ( )
2023-01-26 12:34:57 +08:00
for i , embedBlock := range embedBlocks {
2024-04-26 18:23:21 +08:00
markdown := strings . TrimSpace ( embedBlock . Markdown )
markdown = strings . TrimPrefix ( markdown , "{{" )
stmt := strings . TrimSuffix ( markdown , "}}" )
// 嵌入块的 Markdown 内容需要反转义
stmt = html . UnescapeString ( stmt )
stmt = strings . ReplaceAll ( stmt , editor . IALValEscNewLine , "\n" )
// 需要移除首尾的空白字符以判断是否具有 //!js 标记
stmt = strings . TrimSpace ( stmt )
if strings . HasPrefix ( stmt , "//!js" ) {
// https://github.com/siyuan-note/siyuan/issues/9648
2023-11-24 17:37:34 +08:00
// js 嵌入块不支持自动索引,由前端主动调用 /api/search/updateEmbedBlock 接口更新内容 https://github.com/siyuan-note/siyuan/issues/9736
2023-11-18 12:03:22 +08:00
continue
}
2023-02-01 09:53:40 +08:00
if ! strings . Contains ( strings . ToLower ( stmt ) , "select" ) {
continue
}
2023-01-26 12:34:57 +08:00
queryResultBlocks := sql . SelectBlocksRawStmtNoParse ( stmt , 102400 )
for _ , block := range queryResultBlocks {
embedBlock . Content += block . Content
}
if "" == embedBlock . Content {
embedBlock . Content = "no query result"
}
2023-01-31 17:28:47 +08:00
sql . UpdateBlockContentQueue ( embedBlock )
2023-01-26 12:34:57 +08:00
if 63 <= i { // 一次任务中最多处理 64 个嵌入块,防止卡顿
break
}
}
}
func updateEmbedBlockContent ( embedBlockID string , queryResultBlocks [ ] * EmbedBlock ) {
embedBlock := sql . GetBlock ( embedBlockID )
if nil == embedBlock {
return
}
2023-01-30 09:48:16 +08:00
embedBlock . Content = "" // 嵌入块每查询一次多一个结果 https://github.com/siyuan-note/siyuan/issues/7196
2023-01-26 12:34:57 +08:00
for _ , block := range queryResultBlocks {
embedBlock . Content += block . Block . Markdown
}
if "" == embedBlock . Content {
embedBlock . Content = "no query result"
}
2023-01-31 17:28:47 +08:00
sql . UpdateBlockContentQueue ( embedBlock )
2023-01-26 12:34:57 +08:00
}
2022-09-16 10:47:54 +08:00
func init ( ) {
2023-06-15 10:15:13 +08:00
subscribeSQLEvents ( )
}
func subscribeSQLEvents ( ) {
2024-01-05 20:54:46 +08:00
// 使用下面的 EvtSQLInsertBlocksFTS 就可以了
2023-01-26 00:11:06 +08:00
//eventbus.Subscribe(eventbus.EvtSQLInsertBlocks, func(context map[string]interface{}, current, total, blockCount int, hash string) {
// if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container {
// // Android/iOS 端不显示数据索引和搜索索引状态提示 https://github.com/siyuan-note/siyuan/issues/6392
// return
// }
//
// msg := fmt.Sprintf(Conf.Language(89), current, total, blockCount, hash)
// util.SetBootDetails(msg)
// util.ContextPushMsg(context, msg)
//})
2023-01-26 00:47:14 +08:00
eventbus . Subscribe ( eventbus . EvtSQLInsertBlocksFTS , func ( context map [ string ] interface { } , blockCount int , hash string ) {
2024-11-22 20:15:47 +08:00
if util . ContainerAndroid == util . Container || util . ContainerIOS == util . Container || util . ContainerHarmony == util . Container {
2022-10-28 22:28:22 +08:00
// Android/iOS 端不显示数据索引和搜索索引状态提示 https://github.com/siyuan-note/siyuan/issues/6392
return
}
2023-02-18 14:37:54 +08:00
current := context [ "current" ] . ( int )
2023-01-26 00:47:14 +08:00
total := context [ "total" ]
2023-01-26 00:11:06 +08:00
msg := fmt . Sprintf ( Conf . Language ( 90 ) , current , total , blockCount , hash )
2022-09-16 10:47:54 +08:00
util . SetBootDetails ( msg )
2022-09-16 11:04:52 +08:00
util . ContextPushMsg ( context , msg )
2022-09-16 10:47:54 +08:00
} )
2023-01-26 17:50:21 +08:00
eventbus . Subscribe ( eventbus . EvtSQLDeleteBlocks , func ( context map [ string ] interface { } , rootID string ) {
2024-11-22 20:15:47 +08:00
if util . ContainerAndroid == util . Container || util . ContainerIOS == util . Container || util . ContainerHarmony == util . Container {
2023-01-26 17:50:21 +08:00
return
}
2023-02-18 14:37:54 +08:00
current := context [ "current" ] . ( int )
2023-01-26 17:50:21 +08:00
total := context [ "total" ]
msg := fmt . Sprintf ( Conf . Language ( 93 ) , current , total , rootID )
util . SetBootDetails ( msg )
util . ContextPushMsg ( context , msg )
} )
2024-03-18 21:02:40 +08:00
eventbus . Subscribe ( eventbus . EvtSQLUpdateBlocksHPaths , func ( context map [ string ] interface { } , blockCount int , hash string ) {
2024-11-22 20:15:47 +08:00
if util . ContainerAndroid == util . Container || util . ContainerIOS == util . Container || util . ContainerHarmony == util . Container {
2024-03-18 21:02:40 +08:00
return
}
current := context [ "current" ] . ( int )
total := context [ "total" ]
msg := fmt . Sprintf ( Conf . Language ( 234 ) , current , total , blockCount , hash )
util . SetBootDetails ( msg )
util . ContextPushMsg ( context , msg )
} )
2023-02-16 18:42:19 +08:00
eventbus . Subscribe ( eventbus . EvtSQLInsertHistory , func ( context map [ string ] interface { } ) {
2024-11-22 20:15:47 +08:00
if util . ContainerAndroid == util . Container || util . ContainerIOS == util . Container || util . ContainerHarmony == util . Container {
2023-02-16 18:42:19 +08:00
return
}
2023-02-18 14:37:54 +08:00
current := context [ "current" ] . ( int )
2023-02-16 18:42:19 +08:00
total := context [ "total" ]
msg := fmt . Sprintf ( Conf . Language ( 191 ) , current , total )
util . SetBootDetails ( msg )
util . ContextPushMsg ( context , msg )
} )
2023-08-04 12:05:29 +08:00
eventbus . Subscribe ( eventbus . EvtSQLInsertAssetContent , func ( context map [ string ] interface { } ) {
2024-11-22 20:15:47 +08:00
if util . ContainerAndroid == util . Container || util . ContainerIOS == util . Container || util . ContainerHarmony == util . Container {
2023-08-04 12:05:29 +08:00
return
}
current := context [ "current" ] . ( int )
total := context [ "total" ]
msg := fmt . Sprintf ( Conf . Language ( 217 ) , current , total )
util . SetBootDetails ( msg )
util . ContextPushMsg ( context , msg )
} )
2024-06-27 21:29:31 +08:00
eventbus . Subscribe ( eventbus . EvtSQLIndexChanged , func ( ) {
Conf . DataIndexState = 1
Conf . Save ( )
} )
eventbus . Subscribe ( eventbus . EvtSQLIndexFlushed , func ( ) {
Conf . DataIndexState = 0
Conf . Save ( )
} )
2022-09-16 10:47:54 +08:00
}