diff --git a/kernel/go.mod b/kernel/go.mod index 5f703f748..6f414f46b 100644 --- a/kernel/go.mod +++ b/kernel/go.mod @@ -58,10 +58,13 @@ require ( require ( dmitri.shuralyov.com/font/woff2 v0.0.0-20180220214647-957792cbbdab // indirect + github.com/BobuSumisu/aho-corasick v1.0.3 // indirect github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/semver/v3 v3.2.0 // indirect github.com/alecthomas/chroma v0.10.0 // indirect github.com/andybalholm/cascadia v1.3.1 // indirect + github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0 // indirect + github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6 // indirect github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef // indirect github.com/aws/aws-sdk-go v1.44.199 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect diff --git a/kernel/go.sum b/kernel/go.sum index df5f3f4d9..3ff4e5003 100644 --- a/kernel/go.sum +++ b/kernel/go.sum @@ -16,6 +16,8 @@ github.com/88250/pdfcpu v0.3.13 h1:touMWMZkCGalMIbEg9bxYp7rETM+zwb9hXjwhqi4I7Q= github.com/88250/pdfcpu v0.3.13/go.mod h1:S5YT38L/GCjVjmB4PB84PymA1qfopjEhfhTNQilLpv4= github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1 h1:48T899JQDwyyRu9yXHePYlPdHtpJfrJEUGBMH3SMBWY= github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1/go.mod h1:U3pckKQIgxxkmZjV5yXQjHdGxQK0o/vEZeZ6cQsxfHw= +github.com/BobuSumisu/aho-corasick v1.0.3 h1:uuf+JHwU9CHP2Vx+wAy6jcksJThhJS9ehR8a+4nPE9g= +github.com/BobuSumisu/aho-corasick v1.0.3/go.mod h1:hm4jLcvZKI2vRF2WDU1N4p/jpWtpOzp3nLmi9AzX/XE= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/ConradIrwin/font v0.0.0-20210318200717-ce8d41cc0732 h1:0EDePskeF4vNFCk70ATaFHQzjmwXsk+VImnMJttecNU= github.com/ConradIrwin/font v0.0.0-20210318200717-ce8d41cc0732/go.mod h1:krTLO7JWu6g8RMxG8sl+T1Hf8W93XQacBKJmqFZ2MFY= @@ -33,6 +35,10 @@ github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbf github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0 h1:onfun1RA+KcxaMk1lfrRnwCd1UUuOjJM/lri5eM1qMs= +github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0/go.mod h1:4yg+jNTYlDEzBjhGS96v+zjyA3lfXlFd5CiTLIkPBLI= +github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6 h1:HblK3eJHq54yET63qPCTJnks3loDse5xRmmqHgHzwoI= +github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6/go.mod h1:pbiaLIeYLUbgMY1kwEAdwO6UKD5ZNwdPGQlwokS9fe8= github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA= github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de/go.mod h1:DCaWoUhZrYW9p1lxo/cm8EmUOOzAPSEZNGF2DK1dJgw= github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef h1:2JGTg6JapxP9/R33ZaagQtAM4EkkSYnIAlOG5EI8gkM= diff --git a/kernel/model/virutalref.go b/kernel/model/virutalref.go index 674d33c72..2caf4d75a 100644 --- a/kernel/model/virutalref.go +++ b/kernel/model/virutalref.go @@ -18,6 +18,8 @@ package model import ( "bytes" + goahocorasick "github.com/anknown/ahocorasick" + "github.com/siyuan-note/logging" "regexp" "sort" "strings" @@ -27,7 +29,6 @@ import ( "github.com/88250/lute" "github.com/88250/lute/ast" "github.com/88250/lute/parse" - "github.com/cloudflare/ahocorasick" "github.com/dgraph-io/ristretto" "github.com/siyuan-note/siyuan/kernel/search" "github.com/siyuan-note/siyuan/kernel/sql" @@ -70,22 +71,38 @@ func putBlockVirtualRefKeywords(blockContent, blockID, docTitle string) (ret []s } contentTmp := blockContent - keywordsTmp := keywords + var keywordsTmp [][]rune if !Conf.Search.CaseSensitive { contentTmp = strings.ToLower(blockContent) - for i, keyword := range keywordsTmp { - keywordsTmp[i] = strings.ToLower(keyword) + for _, keyword := range keywords { + keywordsTmp = append(keywordsTmp, []rune(strings.ToLower(keyword))) + } + } else { + for _, keyword := range keywords { + keywordsTmp = append(keywordsTmp, []rune(keyword)) } } if 1024*1024 < len(contentTmp) { - matcher := ahocorasick.NewStringMatcher(keywords) - hits := matcher.Match([]byte(contentTmp)) - for _, hit := range hits { - ret = append(ret, keywords[hit]) + m := goahocorasick.Machine{} + buildErr := m.Build(keywordsTmp) + if nil != buildErr { + logging.LogWarnf("build virtual ref keywords AC matcher failed: %s", buildErr) + for _, keywordRunes := range keywordsTmp { + keyword := string(keywordRunes) + if strings.Contains(contentTmp, keyword) { + ret = append(ret, keyword) + } + } + } else { + hits := m.MultiPatternSearch([]rune(contentTmp), false) + for _, hit := range hits { + ret = append(ret, string(hit.Word)) + } } } else { - for _, keyword := range keywordsTmp { + for _, keywordRunes := range keywordsTmp { + keyword := string(keywordRunes) if strings.Contains(contentTmp, keyword) { ret = append(ret, keyword) }