diff --git a/kernel/search/find.go b/kernel/search/find.go index 41ae54740..104889e79 100644 --- a/kernel/search/find.go +++ b/kernel/search/find.go @@ -17,8 +17,8 @@ package search import ( - "bufio" "bytes" + "io" "os" "path/filepath" "runtime" @@ -47,13 +47,20 @@ func FindAllMatches(root string, targets []string) []Match { return nil } - var searchBytes [][]byte + // 构建基于首字节的模式索引,并计算最长模式长度 + patternIndex := make(map[byte][][]byte) + var maxLen int for _, t := range targets { - if t != "" { - searchBytes = append(searchBytes, []byte(t)) + if t == "" { + continue } + b := []byte(t) + if len(b) > maxLen { + maxLen = len(b) + } + patternIndex[b[0]] = append(patternIndex[b[0]], b) } - if len(searchBytes) == 0 { + if len(patternIndex) == 0 { return nil } @@ -78,7 +85,7 @@ func FindAllMatches(root string, targets []string) []Match { go func() { defer wg.Done() for p := range jobs { - hits := scanFileForTargets(p, searchBytes) + hits := scanFileForTargets(p, patternIndex, maxLen) if len(hits) > 0 { for _, t := range hits { results <- Match{Path: p, Target: t} @@ -102,47 +109,73 @@ func FindAllMatches(root string, targets []string) []Match { return matches } -// scanFileForTargets 在文件中搜索所有目标,返回去重后的命中目标字符串列表 -func scanFileForTargets(path string, targets [][]byte) []string { +// scanFileForTargets 在文件中流式搜索所有目标(基于首字节索引),返回去重后的命中目标字符串列表 +func scanFileForTargets(path string, patternIndex map[byte][][]byte, maxLen int) []string { f, err := os.Open(path) if err != nil { return nil } defer f.Close() - reader := bufio.NewReaderSize(f, 1024*1024) // 1MB 缓冲 + // 构建字节位图,加速首字节检测 + var bitmap [256]bool + for b := range patternIndex { + bitmap[b] = true + } + found := make(map[string]struct{}) - remaining := len(targets) + buf := make([]byte, 64<<10) // 64KB + // 保留上一次块末尾的重叠数据以支持跨块匹配 + var tail []byte for { - line, err := reader.ReadSlice('\n') - if len(line) > 0 { - for _, t := range targets { - ts := string(t) - if _, ok := found[ts]; ok { - continue + n, err := f.Read(buf) + if n > 0 { + // data = tail + buf[:n] + data := make([]byte, len(tail)+n) + copy(data, tail) + copy(data[len(tail):], buf[:n]) + + // 扫描 data,查找任意候选首字节位置 + i := 0 + for i < len(data) { + // 快速跳过非候选字节 + for i < len(data) && !bitmap[data[i]] { + i++ } - if bytes.Contains(line, t) { - found[ts] = struct{}{} - remaining-- - if remaining == 0 { - // 找到所有目标,提前返回 - res := make([]string, 0, len(found)) - for k := range found { - res = append(res, k) + if i >= len(data) { + break + } + b := data[i] + // 对应首字节的所有模式进行校验 + for _, pat := range patternIndex[b] { + pl := len(pat) + // 如果剩余字节不足以完全匹配,则交由下一轮(通过 tail 保证) + if i+pl <= len(data) { + if bytes.Equal(pat, data[i:i+pl]) { + found[string(pat)] = struct{}{} } - return res } } + i++ + } + + // 保留最后 maxLen-1 字节作为下一块的 tail(避免超长内存分配) + if maxLen <= 1 { + tail = nil + } else { + if len(data) >= maxLen-1 { + tail = append(tail[:0], data[len(data)-(maxLen-1):]...) + } else { + tail = append(tail[:0], data...) + } } } if err != nil { - if err == bufio.ErrBufferFull { - for err == bufio.ErrBufferFull { - _, err = reader.ReadSlice('\n') - } - continue + if err == io.EOF { + break } + // 读取出错,返回已有结果 break } }