From 81800a029fef1683a5d8e5012f51861a70c2ae09 Mon Sep 17 00:00:00 2001
From: Daniel <845765@qq.com>
Date: Fri, 11 Aug 2023 00:01:31 +0800
Subject: [PATCH] :sparkles: Support for searching asset content
https://github.com/siyuan-note/siyuan/issues/8874
---
kernel/model/asset_content.go | 175 +++++++++++++++++++++++++++++-
kernel/sql/asset_content_query.go | 138 +++++++++++++++++++++++
2 files changed, 309 insertions(+), 4 deletions(-)
create mode 100644 kernel/sql/asset_content_query.go
diff --git a/kernel/model/asset_content.go b/kernel/model/asset_content.go
index cee8245a0..031e85713 100644
--- a/kernel/model/asset_content.go
+++ b/kernel/model/asset_content.go
@@ -21,6 +21,7 @@ import (
"io/fs"
"os"
"path/filepath"
+ "strconv"
"strings"
"sync"
@@ -36,6 +37,172 @@ import (
"github.com/xuri/excelize/v2"
)
+// FullTextSearchAssetContent 搜索资源文件内容。
+//
+// method:0:关键字,1:查询语法,2:SQL,3:正则表达式
+// orderBy: 0:相关度(默认),1:按更新时间升序,2:按更新时间降序
+func FullTextSearchAssetContent(query string, types map[string]bool, method, orderBy, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount, pageCount int) {
+ query = strings.TrimSpace(query)
+ beforeLen := 36
+ var blocks []*Block
+ orderByClause := buildAssetContentOrderBy(orderBy)
+ switch method {
+ case 1: // 查询语法
+ filter := buildAssetContentTypeFilter(types)
+ blocks, matchedRootCount = fullTextSearchAssetContentByQuerySyntax(query, filter, orderByClause, beforeLen, page, pageSize)
+ case 2: // SQL
+ blocks, matchedRootCount = searchAssetContentBySQL(query, beforeLen, page, pageSize)
+ case 3: // 正则表达式
+ typeFilter := buildAssetContentTypeFilter(types)
+ blocks, matchedRootCount = fullTextSearchAssetContentByRegexp(query, typeFilter, orderByClause, beforeLen, page, pageSize)
+ default: // 关键字
+ filter := buildAssetContentTypeFilter(types)
+ blocks, matchedRootCount = fullTextSearchAssetContentByKeyword(query, filter, orderByClause, beforeLen, page, pageSize)
+ }
+ pageCount = (matchedRootCount + pageSize - 1) / pageSize
+
+ if 1 > len(ret) {
+ ret = []*Block{}
+ }
+ return
+}
+
+func fullTextSearchAssetContentByQuerySyntax(query, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedAssetsCount int) {
+ query = gulu.Str.RemoveInvisible(query)
+ return fullTextSearchAssetContentByFTS(query, typeFilter, orderBy, beforeLen, page, pageSize)
+}
+
+func fullTextSearchAssetContentByKeyword(query, typeFilter string, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedAssetsCount int) {
+ query = gulu.Str.RemoveInvisible(query)
+ query = stringQuery(query)
+ return fullTextSearchAssetContentByFTS(query, typeFilter, orderBy, beforeLen, page, pageSize)
+}
+
+func fullTextSearchAssetContentByRegexp(exp, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedAssetsCount int) {
+ exp = gulu.Str.RemoveInvisible(exp)
+ fieldFilter := assetContentFieldRegexp(exp)
+ stmt := "SELECT * FROM `asset_contents_fts_case_insensitive` WHERE " + fieldFilter + " AND ext IN " + typeFilter
+ stmt += " " + orderBy
+ stmt += " LIMIT " + strconv.Itoa(pageSize) + " OFFSET " + strconv.Itoa((page-1)*pageSize)
+ blocks := sql.SelectBlocksRawStmtNoParse(stmt, Conf.Search.Limit)
+ ret = fromSQLBlocks(&blocks, "", beforeLen)
+ if 1 > len(ret) {
+ ret = []*Block{}
+ }
+
+ matchedAssetsCount = fullTextSearchAssetContentCountByRegexp(exp, typeFilter)
+ return
+}
+
+func assetContentFieldRegexp(exp string) string {
+ buf := bytes.Buffer{}
+ buf.WriteString("(name REGEXP '")
+ buf.WriteString(exp)
+ buf.WriteString("' OR content REGEXP '")
+ buf.WriteString(exp)
+ buf.WriteString("')")
+ return buf.String()
+}
+
+func fullTextSearchAssetContentCountByRegexp(exp, typeFilter string) (matchedAssetsCount int) {
+ fieldFilter := fieldRegexp(exp)
+ stmt := "SELECT COUNT(path) AS `assets` FROM `blocks` WHERE " + fieldFilter + " AND type IN " + typeFilter
+ result, _ := sql.QueryNoLimit(stmt)
+ if 1 > len(result) {
+ return
+ }
+ matchedAssetsCount = int(result[0]["assets"].(int64))
+ return
+}
+
+func fullTextSearchAssetContentByFTS(query, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedAssetsCount int) {
+ table := "asset_contents_fts_case_insensitive"
+ projections := "id, name, ext, path, size, updated, " +
+ "highlight(" + table + ", 6, '', '') AS content"
+ stmt := "SELECT " + projections + " FROM " + table + " WHERE (`" + table + "` MATCH '" + buildAssetContentColumnFilter() + ":(" + query + ")'"
+ stmt += ") AND type IN " + typeFilter
+ stmt += " " + orderBy
+ stmt += " LIMIT " + strconv.Itoa(pageSize) + " OFFSET " + strconv.Itoa((page-1)*pageSize)
+ blocks := sql.SelectBlocksRawStmt(stmt, page, pageSize)
+ ret = fromSQLBlocks(&blocks, "", beforeLen)
+ if 1 > len(ret) {
+ ret = []*Block{}
+ }
+
+ matchedAssetsCount = fullTextSearchAssetContentCount(query, typeFilter)
+ return
+}
+
+func searchAssetContentBySQL(stmt string, beforeLen, page, pageSize int) (ret []*Block, matchedAssetsCount int) {
+ stmt = gulu.Str.RemoveInvisible(stmt)
+ stmt = strings.TrimSpace(stmt)
+ blocks := sql.SelectBlocksRawStmt(stmt, page, pageSize)
+ ret = fromSQLBlocks(&blocks, "", beforeLen)
+ if 1 > len(ret) {
+ ret = []*Block{}
+ return
+ }
+
+ stmt = strings.ToLower(stmt)
+ stmt = strings.ReplaceAll(stmt, "select * ", "select COUNT(path) AS `assets` ")
+ stmt = removeLimitClause(stmt)
+ result, _ := sql.QueryNoLimit(stmt)
+ if 1 > len(ret) {
+ return
+ }
+
+ matchedAssetsCount = int(result[0]["assets"].(int64))
+ return
+}
+
+func fullTextSearchAssetContentCount(query, typeFilter string) (matchedAssetsCount int) {
+ query = gulu.Str.RemoveInvisible(query)
+
+ table := "asset_contents_fts_case_insensitive"
+ stmt := "SELECT COUNT(path) AS `assets` FROM `" + table + "` WHERE (`" + table + "` MATCH '" + buildAssetContentColumnFilter() + ":(" + query + ")'"
+ stmt += ") AND type IN " + typeFilter
+ result, _ := sql.QueryNoLimit(stmt)
+ if 1 > len(result) {
+ return
+ }
+ matchedAssetsCount = int(result[0]["assets"].(int64))
+ return
+}
+
+func buildAssetContentColumnFilter() string {
+ return "{name content}"
+}
+
+func buildAssetContentTypeFilter(types map[string]bool) string {
+ if 0 == len(types) {
+ return ""
+ }
+
+ var buf bytes.Buffer
+ buf.WriteString("(")
+ for k, _ := range types {
+ buf.WriteString("'")
+ buf.WriteString(k)
+ buf.WriteString("',")
+ }
+ buf.Truncate(buf.Len() - 1)
+ buf.WriteString(")")
+ return buf.String()
+}
+
+func buildAssetContentOrderBy(orderBy int) string {
+ switch orderBy {
+ case 0:
+ return "ORDER BY rank DESC"
+ case 1:
+ return "ORDER BY updated ASC"
+ case 2:
+ return "ORDER BY updated DESC"
+ default:
+ return "ORDER BY rank DESC"
+ }
+}
+
var assetContentSearcher = NewAssetsSearcher()
func IndexAssetContent(absPath string) {
@@ -63,8 +230,8 @@ func IndexAssetContent(absPath string) {
assetContents := []*sql.AssetContent{
{
ID: ast.NewNodeID(),
- Name: filepath.Base(p),
- Ext: filepath.Ext(p),
+ Name: util.RemoveID(filepath.Base(p)),
+ Ext: ext,
Path: p,
Size: info.Size(),
Updated: info.ModTime().Unix(),
@@ -148,8 +315,8 @@ func (searcher *AssetsSearcher) FullIndex() {
for _, result := range results {
assetContents = append(assetContents, &sql.AssetContent{
ID: ast.NewNodeID(),
- Name: filepath.Base(result.Path),
- Ext: filepath.Ext(result.Path),
+ Name: util.RemoveID(filepath.Base(result.Path)),
+ Ext: strings.ToLower(filepath.Ext(result.Path)),
Path: result.Path,
Size: result.Size,
Updated: result.Updated,
diff --git a/kernel/sql/asset_content_query.go b/kernel/sql/asset_content_query.go
new file mode 100644
index 000000000..995de2e4b
--- /dev/null
+++ b/kernel/sql/asset_content_query.go
@@ -0,0 +1,138 @@
+// SiYuan - Refactor your thinking
+// Copyright (c) 2020-present, b3log.org
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+package sql
+
+import (
+ "database/sql"
+ "errors"
+ "strconv"
+ "strings"
+
+ "github.com/88250/vitess-sqlparser/sqlparser"
+ "github.com/siyuan-note/logging"
+)
+
+func SelectAssetContentsRawStmt(stmt string, page, limit int) (ret []*Block) {
+ parsedStmt, err := sqlparser.Parse(stmt)
+ if nil != err {
+ return selectAssetContentsRawStmt(stmt, limit)
+ }
+
+ switch parsedStmt.(type) {
+ case *sqlparser.Select:
+ slct := parsedStmt.(*sqlparser.Select)
+ if nil == slct.Limit {
+ slct.Limit = &sqlparser.Limit{
+ Rowcount: &sqlparser.SQLVal{
+ Type: sqlparser.IntVal,
+ Val: []byte(strconv.Itoa(limit)),
+ },
+ }
+ slct.Limit.Offset = &sqlparser.SQLVal{
+ Type: sqlparser.IntVal,
+ Val: []byte(strconv.Itoa((page - 1) * limit)),
+ }
+ } else {
+ if nil != slct.Limit.Rowcount && 0 < len(slct.Limit.Rowcount.(*sqlparser.SQLVal).Val) {
+ limit, _ = strconv.Atoi(string(slct.Limit.Rowcount.(*sqlparser.SQLVal).Val))
+ if 0 >= limit {
+ limit = 32
+ }
+ }
+
+ slct.Limit.Rowcount = &sqlparser.SQLVal{
+ Type: sqlparser.IntVal,
+ Val: []byte(strconv.Itoa(limit)),
+ }
+ slct.Limit.Offset = &sqlparser.SQLVal{
+ Type: sqlparser.IntVal,
+ Val: []byte(strconv.Itoa((page - 1) * limit)),
+ }
+ }
+
+ stmt = sqlparser.String(slct)
+ default:
+ return
+ }
+
+ stmt = strings.ReplaceAll(stmt, "\\'", "''")
+ stmt = strings.ReplaceAll(stmt, "\\\"", "\"")
+ stmt = strings.ReplaceAll(stmt, "\\\\*", "\\*")
+ stmt = strings.ReplaceAll(stmt, "from dual", "")
+ rows, err := queryAssetContent(stmt)
+ if nil != err {
+ if strings.Contains(err.Error(), "syntax error") {
+ return
+ }
+ logging.LogWarnf("sql query [%s] failed: %s", stmt, err)
+ return
+ }
+ defer rows.Close()
+ for rows.Next() {
+ if block := scanAssetContentRows(rows); nil != block {
+ ret = append(ret, block)
+ }
+ }
+ return
+}
+
+func selectAssetContentsRawStmt(stmt string, limit int) (ret []*Block) {
+ rows, err := queryAssetContent(stmt)
+ if nil != err {
+ if strings.Contains(err.Error(), "syntax error") {
+ return
+ }
+ return
+ }
+ defer rows.Close()
+
+ noLimit := !strings.Contains(strings.ToLower(stmt), " limit ")
+ var count, errCount int
+ for rows.Next() {
+ count++
+ if block := scanAssetContentRows(rows); nil != block {
+ ret = append(ret, block)
+ } else {
+ logging.LogWarnf("raw sql query [%s] failed", stmt)
+ errCount++
+ }
+
+ if (noLimit && limit < count) || 0 < errCount {
+ break
+ }
+ }
+ return
+}
+
+func scanAssetContentRows(rows *sql.Rows) (ret *AssetContent) {
+ var block Block
+ if err := rows.Scan(&block.ID, &block.ParentID, &block.RootID, &block.Hash, &block.Box, &block.Path, &block.HPath, &block.Name, &block.Alias, &block.Memo, &block.Tag, &block.Content, &block.FContent, &block.Markdown, &block.Length, &block.Type, &block.SubType, &block.IAL, &block.Sort, &block.Created, &block.Updated); nil != err {
+ logging.LogErrorf("query scan field failed: %s\n%s", err, logging.ShortStack())
+ return
+ }
+ ret = &block
+ putBlockCache(ret)
+ return
+}
+
+func queryAssetContent(query string, args ...interface{}) (*sql.Rows, error) {
+ query = strings.TrimSpace(query)
+ if "" == query {
+ return nil, errors.New("statement is empty")
+ }
+ return assetContentDB.Query(query, args...)
+}