diff --git a/kernel/cache/asset.go b/kernel/cache/asset.go index 3af22e8e4..2f1181703 100644 --- a/kernel/cache/asset.go +++ b/kernel/cache/asset.go @@ -51,6 +51,14 @@ func RemoveAsset(path string) { delete(assetsCache, path) } +func ExistAsset(path string) (ret bool) { + assetsLock.Lock() + defer assetsLock.Unlock() + + _, ret = assetsCache[path] + return +} + func LoadAssets() { defer logging.Recover() diff --git a/kernel/job/cron.go b/kernel/job/cron.go index ea377c158..a6206877e 100644 --- a/kernel/job/cron.go +++ b/kernel/job/cron.go @@ -41,8 +41,8 @@ func StartCron() { go every(10*time.Minute, model.FixIndexJob) go every(10*time.Minute, model.IndexEmbedBlockJob) go every(10*time.Minute, model.CacheVirtualBlockRefJob) - go every(12*time.Second, model.OCRAssetsJob) - go every(12*time.Second, model.FlushAssetsTextsJob) + go every(30*time.Second, model.OCRAssetsJob) + go every(30*time.Second, model.FlushAssetsTextsJob) go every(30*time.Second, model.HookDesktopUIProcJob) } diff --git a/kernel/model/ocr.go b/kernel/model/ocr.go index 067a32232..f131716f9 100644 --- a/kernel/model/ocr.go +++ b/kernel/model/ocr.go @@ -21,7 +21,7 @@ func OCRAssetsJob() { return } - task.AppendTaskWithTimeout(task.OCRImage, 7*time.Second, autoOCRAssets) + task.AppendTaskWithTimeout(task.OCRImage, 30*time.Second, autoOCRAssets) } func autoOCRAssets() { @@ -40,7 +40,7 @@ func autoOCRAssets() { if "" != text { util.AssetsTextsChanged = true } - if 4 <= i { // 一次任务中最多处理 4 张图片,防止卡顿 + if 7 <= i { // 一次任务中最多处理 7 张图片,防止长时间占用系统资源 break } } diff --git a/kernel/sql/block.go b/kernel/sql/block.go index 0a022a11a..6f44549cc 100644 --- a/kernel/sql/block.go +++ b/kernel/sql/block.go @@ -20,6 +20,9 @@ import ( "database/sql" "github.com/siyuan-note/siyuan/kernel/cache" + "github.com/siyuan-note/siyuan/kernel/filesys" + "github.com/siyuan-note/siyuan/kernel/treenode" + "github.com/siyuan-note/siyuan/kernel/util" ) type Block struct { @@ -88,3 +91,41 @@ func updateBlockContent(tx *sql.Tx, block *Block) (err error) { putBlockCache(block) return } + +func indexNode(tx *sql.Tx, id string) (err error) { + bt := treenode.GetBlockTree(id) + if nil == bt { + return + } + + luteEngine := util.NewLute() + tree, _ := filesys.LoadTree(bt.BoxID, bt.Path, luteEngine) + if nil == tree { + return + } + + node := treenode.GetNodeInTree(tree, id) + if nil == node { + return + } + + content := treenode.NodeStaticContent(node, nil, true, indexAssetPath) + stmt := "UPDATE blocks SET content = ? WHERE id = ?" + if err = execStmtTx(tx, stmt, content, id); nil != err { + tx.Rollback() + return + } + stmt = "UPDATE blocks_fts SET content = ? WHERE id = ?" + if err = execStmtTx(tx, stmt, content, id); nil != err { + tx.Rollback() + return + } + if !caseSensitive { + stmt = "UPDATE blocks_fts_case_insensitive SET content = ? WHERE id = ?" + if err = execStmtTx(tx, stmt, content, id); nil != err { + tx.Rollback() + return + } + } + return +} diff --git a/kernel/sql/database.go b/kernel/sql/database.go index 40c6cef2f..995788bb1 100644 --- a/kernel/sql/database.go +++ b/kernel/sql/database.go @@ -798,9 +798,18 @@ func buildBlockFromNode(n *ast.Node, tree *parse.Tree) (block *Block, attributes length = utf8.RuneCountInString(fcontent) } else if n.IsContainerBlock() { markdown = treenode.ExportNodeStdMd(n, luteEngine) + + if !treenode.IsNodeOCRed(n) { + IndexNodeQueue(n.ID) + } content = treenode.NodeStaticContent(n, nil, true, indexAssetPath) fc := treenode.FirstLeafBlock(n) + + if !treenode.IsNodeOCRed(fc) { + IndexNodeQueue(fc.ID) + } fcontent = treenode.NodeStaticContent(fc, nil, true, false) + parentID = n.Parent.ID // 将标题块作为父节点 if h := heading(n); nil != h { @@ -809,7 +818,13 @@ func buildBlockFromNode(n *ast.Node, tree *parse.Tree) (block *Block, attributes length = utf8.RuneCountInString(fcontent) } else { markdown = treenode.ExportNodeStdMd(n, luteEngine) + + if !treenode.IsNodeOCRed(n) { + IndexNodeQueue(n.ID) + } + content = treenode.NodeStaticContent(n, nil, true, indexAssetPath) + parentID = n.Parent.ID // 将标题块作为父节点 if h := heading(n); nil != h { diff --git a/kernel/sql/queue.go b/kernel/sql/queue.go index b419826ed..edcd5f40d 100644 --- a/kernel/sql/queue.go +++ b/kernel/sql/queue.go @@ -51,6 +51,7 @@ type dbQueueOperation struct { box string // delete_box/delete_box_refs/index renameTree *parse.Tree // rename/rename_sub_tree block *Block // update_block_content + id string // index_node removeAssetHashes []string // delete_assets } @@ -191,6 +192,8 @@ func execOp(op *dbQueueOperation, tx *sql.Tx, context map[string]interface{}) (e err = updateBlockContent(tx, op.block) case "delete_assets": err = deleteAssetsByHashes(tx, op.removeAssetHashes) + case "index_node": + err = indexNode(tx, op.id) default: msg := fmt.Sprintf("unknown operation [%s]", op.action) logging.LogErrorf(msg) @@ -199,6 +202,20 @@ func execOp(op *dbQueueOperation, tx *sql.Tx, context map[string]interface{}) (e return } +func IndexNodeQueue(id string) { + dbQueueLock.Lock() + defer dbQueueLock.Unlock() + + newOp := &dbQueueOperation{id: id, inQueueTime: time.Now(), action: "index_node"} + for i, op := range operationQueue { + if "index_node" == op.action && op.id == id { + operationQueue[i] = newOp + return + } + } + operationQueue = append(operationQueue, newOp) +} + func BatchRemoveAssetsQueue(hashes []string) { if 1 > len(hashes) { return diff --git a/kernel/treenode/node.go b/kernel/treenode/node.go index b230912af..e28f209d0 100644 --- a/kernel/treenode/node.go +++ b/kernel/treenode/node.go @@ -137,6 +137,32 @@ func ExportNodeStdMd(node *ast.Node, luteEngine *lute.Lute) string { return markdown } +func IsNodeOCRed(node *ast.Node) (ret bool) { + ret = true + ast.Walk(node, func(n *ast.Node, entering bool) ast.WalkStatus { + if !entering { + return ast.WalkContinue + } + + if ast.NodeImage == n.Type { + linkDest := n.ChildByType(ast.NodeLinkDest) + if nil != linkDest { + linkDestStr := linkDest.TokensStr() + if !cache.ExistAsset(linkDestStr) { + return ast.WalkContinue + } + + if !util.ExistsAssetText(linkDestStr) { + ret = false + return ast.WalkStop + } + } + } + return ast.WalkContinue + }) + return +} + func NodeStaticContent(node *ast.Node, excludeTypes []string, includeTextMarkATitleURL, includeAssetPath bool) string { if nil == node { return "" diff --git a/kernel/util/tesseract.go b/kernel/util/tesseract.go index 589fd9b3c..9f56c19c9 100644 --- a/kernel/util/tesseract.go +++ b/kernel/util/tesseract.go @@ -52,27 +52,32 @@ func SetAssetText(asset, text string) { AssetsTextsChanged = true } -func GetAssetText(asset string, force bool) string { +func ExistsAssetText(asset string) (ret bool) { + AssetsTextsLock.Lock() + _, ret = AssetsTexts[asset] + AssetsTextsLock.Unlock() + return +} + +func GetAssetText(asset string, force bool) (ret string) { if !force { AssetsTextsLock.Lock() - ret, ok := AssetsTexts[asset] + ret = AssetsTexts[asset] AssetsTextsLock.Unlock() - if ok { - return ret - } + return } assetsPath := GetDataAssetsAbsPath() assetAbsPath := strings.TrimPrefix(asset, "assets") assetAbsPath = filepath.Join(assetsPath, assetAbsPath) - ret := Tesseract(assetAbsPath) + ret = Tesseract(assetAbsPath) AssetsTextsLock.Lock() AssetsTexts[asset] = ret AssetsTextsLock.Unlock() if "" != ret { AssetsTextsChanged = true } - return ret + return } func IsTesseractExtractable(p string) bool {