diff --git a/kernel/model/asset_content.go b/kernel/model/asset_content.go index cee8245a0..031e85713 100644 --- a/kernel/model/asset_content.go +++ b/kernel/model/asset_content.go @@ -21,6 +21,7 @@ import ( "io/fs" "os" "path/filepath" + "strconv" "strings" "sync" @@ -36,6 +37,172 @@ import ( "github.com/xuri/excelize/v2" ) +// FullTextSearchAssetContent 搜索资源文件内容。 +// +// method:0:关键字,1:查询语法,2:SQL,3:正则表达式 +// orderBy: 0:相关度(默认),1:按更新时间升序,2:按更新时间降序 +func FullTextSearchAssetContent(query string, types map[string]bool, method, orderBy, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount, pageCount int) { + query = strings.TrimSpace(query) + beforeLen := 36 + var blocks []*Block + orderByClause := buildAssetContentOrderBy(orderBy) + switch method { + case 1: // 查询语法 + filter := buildAssetContentTypeFilter(types) + blocks, matchedRootCount = fullTextSearchAssetContentByQuerySyntax(query, filter, orderByClause, beforeLen, page, pageSize) + case 2: // SQL + blocks, matchedRootCount = searchAssetContentBySQL(query, beforeLen, page, pageSize) + case 3: // 正则表达式 + typeFilter := buildAssetContentTypeFilter(types) + blocks, matchedRootCount = fullTextSearchAssetContentByRegexp(query, typeFilter, orderByClause, beforeLen, page, pageSize) + default: // 关键字 + filter := buildAssetContentTypeFilter(types) + blocks, matchedRootCount = fullTextSearchAssetContentByKeyword(query, filter, orderByClause, beforeLen, page, pageSize) + } + pageCount = (matchedRootCount + pageSize - 1) / pageSize + + if 1 > len(ret) { + ret = []*Block{} + } + return +} + +func fullTextSearchAssetContentByQuerySyntax(query, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedAssetsCount int) { + query = gulu.Str.RemoveInvisible(query) + return fullTextSearchAssetContentByFTS(query, typeFilter, orderBy, beforeLen, page, pageSize) +} + +func fullTextSearchAssetContentByKeyword(query, typeFilter string, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedAssetsCount int) { + query = gulu.Str.RemoveInvisible(query) + query = stringQuery(query) + return fullTextSearchAssetContentByFTS(query, typeFilter, orderBy, beforeLen, page, pageSize) +} + +func fullTextSearchAssetContentByRegexp(exp, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedAssetsCount int) { + exp = gulu.Str.RemoveInvisible(exp) + fieldFilter := assetContentFieldRegexp(exp) + stmt := "SELECT * FROM `asset_contents_fts_case_insensitive` WHERE " + fieldFilter + " AND ext IN " + typeFilter + stmt += " " + orderBy + stmt += " LIMIT " + strconv.Itoa(pageSize) + " OFFSET " + strconv.Itoa((page-1)*pageSize) + blocks := sql.SelectBlocksRawStmtNoParse(stmt, Conf.Search.Limit) + ret = fromSQLBlocks(&blocks, "", beforeLen) + if 1 > len(ret) { + ret = []*Block{} + } + + matchedAssetsCount = fullTextSearchAssetContentCountByRegexp(exp, typeFilter) + return +} + +func assetContentFieldRegexp(exp string) string { + buf := bytes.Buffer{} + buf.WriteString("(name REGEXP '") + buf.WriteString(exp) + buf.WriteString("' OR content REGEXP '") + buf.WriteString(exp) + buf.WriteString("')") + return buf.String() +} + +func fullTextSearchAssetContentCountByRegexp(exp, typeFilter string) (matchedAssetsCount int) { + fieldFilter := fieldRegexp(exp) + stmt := "SELECT COUNT(path) AS `assets` FROM `blocks` WHERE " + fieldFilter + " AND type IN " + typeFilter + result, _ := sql.QueryNoLimit(stmt) + if 1 > len(result) { + return + } + matchedAssetsCount = int(result[0]["assets"].(int64)) + return +} + +func fullTextSearchAssetContentByFTS(query, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedAssetsCount int) { + table := "asset_contents_fts_case_insensitive" + projections := "id, name, ext, path, size, updated, " + + "highlight(" + table + ", 6, '', '') AS content" + stmt := "SELECT " + projections + " FROM " + table + " WHERE (`" + table + "` MATCH '" + buildAssetContentColumnFilter() + ":(" + query + ")'" + stmt += ") AND type IN " + typeFilter + stmt += " " + orderBy + stmt += " LIMIT " + strconv.Itoa(pageSize) + " OFFSET " + strconv.Itoa((page-1)*pageSize) + blocks := sql.SelectBlocksRawStmt(stmt, page, pageSize) + ret = fromSQLBlocks(&blocks, "", beforeLen) + if 1 > len(ret) { + ret = []*Block{} + } + + matchedAssetsCount = fullTextSearchAssetContentCount(query, typeFilter) + return +} + +func searchAssetContentBySQL(stmt string, beforeLen, page, pageSize int) (ret []*Block, matchedAssetsCount int) { + stmt = gulu.Str.RemoveInvisible(stmt) + stmt = strings.TrimSpace(stmt) + blocks := sql.SelectBlocksRawStmt(stmt, page, pageSize) + ret = fromSQLBlocks(&blocks, "", beforeLen) + if 1 > len(ret) { + ret = []*Block{} + return + } + + stmt = strings.ToLower(stmt) + stmt = strings.ReplaceAll(stmt, "select * ", "select COUNT(path) AS `assets` ") + stmt = removeLimitClause(stmt) + result, _ := sql.QueryNoLimit(stmt) + if 1 > len(ret) { + return + } + + matchedAssetsCount = int(result[0]["assets"].(int64)) + return +} + +func fullTextSearchAssetContentCount(query, typeFilter string) (matchedAssetsCount int) { + query = gulu.Str.RemoveInvisible(query) + + table := "asset_contents_fts_case_insensitive" + stmt := "SELECT COUNT(path) AS `assets` FROM `" + table + "` WHERE (`" + table + "` MATCH '" + buildAssetContentColumnFilter() + ":(" + query + ")'" + stmt += ") AND type IN " + typeFilter + result, _ := sql.QueryNoLimit(stmt) + if 1 > len(result) { + return + } + matchedAssetsCount = int(result[0]["assets"].(int64)) + return +} + +func buildAssetContentColumnFilter() string { + return "{name content}" +} + +func buildAssetContentTypeFilter(types map[string]bool) string { + if 0 == len(types) { + return "" + } + + var buf bytes.Buffer + buf.WriteString("(") + for k, _ := range types { + buf.WriteString("'") + buf.WriteString(k) + buf.WriteString("',") + } + buf.Truncate(buf.Len() - 1) + buf.WriteString(")") + return buf.String() +} + +func buildAssetContentOrderBy(orderBy int) string { + switch orderBy { + case 0: + return "ORDER BY rank DESC" + case 1: + return "ORDER BY updated ASC" + case 2: + return "ORDER BY updated DESC" + default: + return "ORDER BY rank DESC" + } +} + var assetContentSearcher = NewAssetsSearcher() func IndexAssetContent(absPath string) { @@ -63,8 +230,8 @@ func IndexAssetContent(absPath string) { assetContents := []*sql.AssetContent{ { ID: ast.NewNodeID(), - Name: filepath.Base(p), - Ext: filepath.Ext(p), + Name: util.RemoveID(filepath.Base(p)), + Ext: ext, Path: p, Size: info.Size(), Updated: info.ModTime().Unix(), @@ -148,8 +315,8 @@ func (searcher *AssetsSearcher) FullIndex() { for _, result := range results { assetContents = append(assetContents, &sql.AssetContent{ ID: ast.NewNodeID(), - Name: filepath.Base(result.Path), - Ext: filepath.Ext(result.Path), + Name: util.RemoveID(filepath.Base(result.Path)), + Ext: strings.ToLower(filepath.Ext(result.Path)), Path: result.Path, Size: result.Size, Updated: result.Updated, diff --git a/kernel/sql/asset_content_query.go b/kernel/sql/asset_content_query.go new file mode 100644 index 000000000..995de2e4b --- /dev/null +++ b/kernel/sql/asset_content_query.go @@ -0,0 +1,138 @@ +// SiYuan - Refactor your thinking +// Copyright (c) 2020-present, b3log.org +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package sql + +import ( + "database/sql" + "errors" + "strconv" + "strings" + + "github.com/88250/vitess-sqlparser/sqlparser" + "github.com/siyuan-note/logging" +) + +func SelectAssetContentsRawStmt(stmt string, page, limit int) (ret []*Block) { + parsedStmt, err := sqlparser.Parse(stmt) + if nil != err { + return selectAssetContentsRawStmt(stmt, limit) + } + + switch parsedStmt.(type) { + case *sqlparser.Select: + slct := parsedStmt.(*sqlparser.Select) + if nil == slct.Limit { + slct.Limit = &sqlparser.Limit{ + Rowcount: &sqlparser.SQLVal{ + Type: sqlparser.IntVal, + Val: []byte(strconv.Itoa(limit)), + }, + } + slct.Limit.Offset = &sqlparser.SQLVal{ + Type: sqlparser.IntVal, + Val: []byte(strconv.Itoa((page - 1) * limit)), + } + } else { + if nil != slct.Limit.Rowcount && 0 < len(slct.Limit.Rowcount.(*sqlparser.SQLVal).Val) { + limit, _ = strconv.Atoi(string(slct.Limit.Rowcount.(*sqlparser.SQLVal).Val)) + if 0 >= limit { + limit = 32 + } + } + + slct.Limit.Rowcount = &sqlparser.SQLVal{ + Type: sqlparser.IntVal, + Val: []byte(strconv.Itoa(limit)), + } + slct.Limit.Offset = &sqlparser.SQLVal{ + Type: sqlparser.IntVal, + Val: []byte(strconv.Itoa((page - 1) * limit)), + } + } + + stmt = sqlparser.String(slct) + default: + return + } + + stmt = strings.ReplaceAll(stmt, "\\'", "''") + stmt = strings.ReplaceAll(stmt, "\\\"", "\"") + stmt = strings.ReplaceAll(stmt, "\\\\*", "\\*") + stmt = strings.ReplaceAll(stmt, "from dual", "") + rows, err := queryAssetContent(stmt) + if nil != err { + if strings.Contains(err.Error(), "syntax error") { + return + } + logging.LogWarnf("sql query [%s] failed: %s", stmt, err) + return + } + defer rows.Close() + for rows.Next() { + if block := scanAssetContentRows(rows); nil != block { + ret = append(ret, block) + } + } + return +} + +func selectAssetContentsRawStmt(stmt string, limit int) (ret []*Block) { + rows, err := queryAssetContent(stmt) + if nil != err { + if strings.Contains(err.Error(), "syntax error") { + return + } + return + } + defer rows.Close() + + noLimit := !strings.Contains(strings.ToLower(stmt), " limit ") + var count, errCount int + for rows.Next() { + count++ + if block := scanAssetContentRows(rows); nil != block { + ret = append(ret, block) + } else { + logging.LogWarnf("raw sql query [%s] failed", stmt) + errCount++ + } + + if (noLimit && limit < count) || 0 < errCount { + break + } + } + return +} + +func scanAssetContentRows(rows *sql.Rows) (ret *AssetContent) { + var block Block + if err := rows.Scan(&block.ID, &block.ParentID, &block.RootID, &block.Hash, &block.Box, &block.Path, &block.HPath, &block.Name, &block.Alias, &block.Memo, &block.Tag, &block.Content, &block.FContent, &block.Markdown, &block.Length, &block.Type, &block.SubType, &block.IAL, &block.Sort, &block.Created, &block.Updated); nil != err { + logging.LogErrorf("query scan field failed: %s\n%s", err, logging.ShortStack()) + return + } + ret = &block + putBlockCache(ret) + return +} + +func queryAssetContent(query string, args ...interface{}) (*sql.Rows, error) { + query = strings.TrimSpace(query) + if "" == query { + return nil, errors.New("statement is empty") + } + return assetContentDB.Query(query, args...) +}