使用 AC 算法优化虚拟引用匹配性能

This commit is contained in:
Liang Ding 2023-02-17 14:43:03 +08:00
parent 05c2696e0a
commit eabd1b67ed
No known key found for this signature in database
GPG key ID: 136F30F901A2231D
3 changed files with 35 additions and 9 deletions

View file

@ -58,10 +58,13 @@ require (
require ( require (
dmitri.shuralyov.com/font/woff2 v0.0.0-20180220214647-957792cbbdab // indirect dmitri.shuralyov.com/font/woff2 v0.0.0-20180220214647-957792cbbdab // indirect
github.com/BobuSumisu/aho-corasick v1.0.3 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.2.0 // indirect github.com/Masterminds/semver/v3 v3.2.0 // indirect
github.com/alecthomas/chroma v0.10.0 // indirect github.com/alecthomas/chroma v0.10.0 // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0 // indirect
github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6 // indirect
github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef // indirect github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef // indirect
github.com/aws/aws-sdk-go v1.44.199 // indirect github.com/aws/aws-sdk-go v1.44.199 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect

View file

@ -16,6 +16,8 @@ github.com/88250/pdfcpu v0.3.13 h1:touMWMZkCGalMIbEg9bxYp7rETM+zwb9hXjwhqi4I7Q=
github.com/88250/pdfcpu v0.3.13/go.mod h1:S5YT38L/GCjVjmB4PB84PymA1qfopjEhfhTNQilLpv4= github.com/88250/pdfcpu v0.3.13/go.mod h1:S5YT38L/GCjVjmB4PB84PymA1qfopjEhfhTNQilLpv4=
github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1 h1:48T899JQDwyyRu9yXHePYlPdHtpJfrJEUGBMH3SMBWY= github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1 h1:48T899JQDwyyRu9yXHePYlPdHtpJfrJEUGBMH3SMBWY=
github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1/go.mod h1:U3pckKQIgxxkmZjV5yXQjHdGxQK0o/vEZeZ6cQsxfHw= github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1/go.mod h1:U3pckKQIgxxkmZjV5yXQjHdGxQK0o/vEZeZ6cQsxfHw=
github.com/BobuSumisu/aho-corasick v1.0.3 h1:uuf+JHwU9CHP2Vx+wAy6jcksJThhJS9ehR8a+4nPE9g=
github.com/BobuSumisu/aho-corasick v1.0.3/go.mod h1:hm4jLcvZKI2vRF2WDU1N4p/jpWtpOzp3nLmi9AzX/XE=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/ConradIrwin/font v0.0.0-20210318200717-ce8d41cc0732 h1:0EDePskeF4vNFCk70ATaFHQzjmwXsk+VImnMJttecNU= github.com/ConradIrwin/font v0.0.0-20210318200717-ce8d41cc0732 h1:0EDePskeF4vNFCk70ATaFHQzjmwXsk+VImnMJttecNU=
github.com/ConradIrwin/font v0.0.0-20210318200717-ce8d41cc0732/go.mod h1:krTLO7JWu6g8RMxG8sl+T1Hf8W93XQacBKJmqFZ2MFY= github.com/ConradIrwin/font v0.0.0-20210318200717-ce8d41cc0732/go.mod h1:krTLO7JWu6g8RMxG8sl+T1Hf8W93XQacBKJmqFZ2MFY=
@ -33,6 +35,10 @@ github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbf
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0 h1:onfun1RA+KcxaMk1lfrRnwCd1UUuOjJM/lri5eM1qMs=
github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0/go.mod h1:4yg+jNTYlDEzBjhGS96v+zjyA3lfXlFd5CiTLIkPBLI=
github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6 h1:HblK3eJHq54yET63qPCTJnks3loDse5xRmmqHgHzwoI=
github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6/go.mod h1:pbiaLIeYLUbgMY1kwEAdwO6UKD5ZNwdPGQlwokS9fe8=
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA= github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA=
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de/go.mod h1:DCaWoUhZrYW9p1lxo/cm8EmUOOzAPSEZNGF2DK1dJgw= github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de/go.mod h1:DCaWoUhZrYW9p1lxo/cm8EmUOOzAPSEZNGF2DK1dJgw=
github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef h1:2JGTg6JapxP9/R33ZaagQtAM4EkkSYnIAlOG5EI8gkM= github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef h1:2JGTg6JapxP9/R33ZaagQtAM4EkkSYnIAlOG5EI8gkM=

View file

@ -18,6 +18,8 @@ package model
import ( import (
"bytes" "bytes"
goahocorasick "github.com/anknown/ahocorasick"
"github.com/siyuan-note/logging"
"regexp" "regexp"
"sort" "sort"
"strings" "strings"
@ -27,7 +29,6 @@ import (
"github.com/88250/lute" "github.com/88250/lute"
"github.com/88250/lute/ast" "github.com/88250/lute/ast"
"github.com/88250/lute/parse" "github.com/88250/lute/parse"
"github.com/cloudflare/ahocorasick"
"github.com/dgraph-io/ristretto" "github.com/dgraph-io/ristretto"
"github.com/siyuan-note/siyuan/kernel/search" "github.com/siyuan-note/siyuan/kernel/search"
"github.com/siyuan-note/siyuan/kernel/sql" "github.com/siyuan-note/siyuan/kernel/sql"
@ -70,22 +71,38 @@ func putBlockVirtualRefKeywords(blockContent, blockID, docTitle string) (ret []s
} }
contentTmp := blockContent contentTmp := blockContent
keywordsTmp := keywords var keywordsTmp [][]rune
if !Conf.Search.CaseSensitive { if !Conf.Search.CaseSensitive {
contentTmp = strings.ToLower(blockContent) contentTmp = strings.ToLower(blockContent)
for i, keyword := range keywordsTmp { for _, keyword := range keywords {
keywordsTmp[i] = strings.ToLower(keyword) keywordsTmp = append(keywordsTmp, []rune(strings.ToLower(keyword)))
}
} else {
for _, keyword := range keywords {
keywordsTmp = append(keywordsTmp, []rune(keyword))
} }
} }
if 1024*1024 < len(contentTmp) { if 1024*1024 < len(contentTmp) {
matcher := ahocorasick.NewStringMatcher(keywords) m := goahocorasick.Machine{}
hits := matcher.Match([]byte(contentTmp)) buildErr := m.Build(keywordsTmp)
for _, hit := range hits { if nil != buildErr {
ret = append(ret, keywords[hit]) logging.LogWarnf("build virtual ref keywords AC matcher failed: %s", buildErr)
for _, keywordRunes := range keywordsTmp {
keyword := string(keywordRunes)
if strings.Contains(contentTmp, keyword) {
ret = append(ret, keyword)
}
} }
} else { } else {
for _, keyword := range keywordsTmp { hits := m.MultiPatternSearch([]rune(contentTmp), false)
for _, hit := range hits {
ret = append(ret, string(hit.Word))
}
}
} else {
for _, keywordRunes := range keywordsTmp {
keyword := string(keywordRunes)
if strings.Contains(contentTmp, keyword) { if strings.Contains(contentTmp, keyword) {
ret = append(ret, keyword) ret = append(ret, keyword)
} }