// SiYuan - Refactor your thinking // Copyright (c) 2020-present, b3log.org // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package util import ( "bytes" "fmt" "math/rand" "regexp" "strconv" "strings" "time" "unicode" "github.com/88250/lute/html" "github.com/siyuan-note/logging" ) func init() { rand.Seed(time.Now().UTC().UnixNano()) } func GetDuplicateName(master string) (ret string) { if "" == master { return } ret = master + " (1)" r := regexp.MustCompile("^(.*) \\((\\d+)\\)$") m := r.FindStringSubmatch(master) if nil == m || 3 > len(m) { return } num, _ := strconv.Atoi(m[2]) num++ ret = fmt.Sprintf("%s (%d)", m[1], num) return } var ( letter = []rune("abcdefghijklmnopqrstuvwxyz0123456789") ) func RandString(length int) string { b := make([]rune, length) for i := range b { b[i] = letter[rand.Intn(len(letter))] } return string(b) } // InsertElem inserts value at index into s. // 0 <= index <= len(s) func InsertElem[T any](s []T, index int, value T) []T { if len(s) == index { // nil or empty slice or after last element return append(s, value) } s = append(s[:index+1], s[index:]...) // index < len(s) s[index] = value return s } // RemoveElem removes the element at index i from s. func RemoveElem[T any](s []T, index int) []T { return append(s[:index], s[index+1:]...) } func EscapeHTML(s string) (ret string) { ret = s if "" == strings.TrimSpace(ret) { return } ret = html.EscapeString(ret) return } func UnescapeHTML(s string) (ret string) { ret = s if "" == strings.TrimSpace(ret) { return } ret = html.UnescapeString(ret) return } func HasUnclosedHtmlTag(htmlStr string) bool { // 检查未闭合注释 openIdx := 0 for { start := strings.Index(htmlStr[openIdx:], "") if end == -1 { return true // 存在未闭合注释 } openIdx = start + 4 + end + 3 } // 去除所有注释内容 commentRe := regexp.MustCompile(``) htmlStr = commentRe.ReplaceAllString(htmlStr, "") tagRe := regexp.MustCompile(`<(/?)([a-zA-Z0-9]+)[^>]*?>`) selfClosing := map[string]bool{ "br": true, "img": true, "hr": true, "input": true, "meta": true, "link": true, } stack := []string{} matches := tagRe.FindAllStringSubmatch(htmlStr, -1) for _, m := range matches { isClose := m[1] == "/" tag := strings.ToLower(m[2]) if selfClosing[tag] { continue } if !isClose { stack = append(stack, tag) } else { if len(stack) == 0 || stack[len(stack)-1] != tag { return true // 闭合标签不匹配 } stack = stack[:len(stack)-1] } } return len(stack) != 0 } func Reverse(s string) string { runes := []rune(s) for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { runes[i], runes[j] = runes[j], runes[i] } return string(runes) } func RemoveRedundantSpace(str string) string { buf := bytes.Buffer{} lastIsChinese := false lastIsSpace := false for _, r := range str { if unicode.IsSpace(r) { if lastIsChinese || lastIsSpace { continue } buf.WriteRune(' ') lastIsChinese = false lastIsSpace = true continue } lastIsSpace = false buf.WriteRune(r) if unicode.Is(unicode.Han, r) { lastIsChinese = true continue } else { lastIsChinese = false } } return buf.String() } func Convert2Float(s string) (float64, bool) { s = RemoveInvalid(s) s = strings.ReplaceAll(s, " ", "") s = strings.ReplaceAll(s, ",", "") buf := bytes.Buffer{} for _, r := range s { if unicode.IsDigit(r) || '.' == r || '-' == r { buf.WriteRune(r) } } s = buf.String() ret, err := strconv.ParseFloat(strings.TrimSpace(s), 64) if err != nil { return 0, false } return ret, true } func ContainsSubStr(s string, subStrs []string) bool { for _, v := range subStrs { if strings.Contains(s, v) { return true } } return false } func GetContainsSubStrs(s string, subStrs []string) (ret []string) { for _, v := range subStrs { if strings.Contains(s, v) { ret = append(ret, v) } } return } func ReplaceStr(strs []string, old, new string) (ret []string, changed bool) { if old == new { return strs, false } for i, v := range strs { if v == old { strs[i] = new changed = true } } ret = strs return } func SanitizeSVG(svgInput string) string { // 1. 将字符串解析为节点树 doc, err := html.Parse(strings.NewReader(svgInput)) if err != nil { logging.LogWarnf("parse svg failed: %v", err) return svgInput } // 2. 定义递归移除逻辑 var walk func(*html.Node) walk = func(n *html.Node) { // 倒序遍历子节点,确保删除操作不影响后续迭代 for c := n.FirstChild; c != nil; { next := c.NextSibling if c.Type == html.ElementNode { tag := strings.ToLower(c.Data) if tag == "script" || tag == "iframe" || tag == "object" || tag == "embed" || tag == "foreignobject" { n.RemoveChild(c) c = next continue } // 清理不安全属性 if len(c.Attr) > 0 { // 过滤属性:删除以 on 开头的属性(事件处理),href/xlink:href 指向 javascript: 或不安全 data:,以及危险的 style 表达式 filtered := c.Attr[:0] for _, a := range c.Attr { key := strings.ToLower(a.Key) val := strings.TrimSpace(strings.ToLower(a.Val)) // 删除事件处理器属性(onload, onerror 等) if strings.HasPrefix(key, "on") { continue } // 删除 href 或 xlink:href 指向 javascript: 或某些不安全的 data: URI if key == "href" || key == "xlink:href" || key == "xlinkhref" { if strings.HasPrefix(val, "javascript:") { continue } // 对 data: 做保守处理,删除包含可执行内容的 data:text/html 或 data:image/svg+xml if strings.HasPrefix(val, "data:") { if strings.Contains(val, "text/html") || strings.Contains(val, "image/svg+xml") || strings.Contains(val, "application/xhtml+xml") { continue } } } // 清理 style 中的危险表达式,如 expression() 或 url(javascript:...) if key == "style" { low := val if strings.Contains(low, "expression(") || strings.Contains(low, "url(javascript:") || strings.Contains(low, "javascript:") { // 丢弃整个 style 属性以保证安全 continue } } // 其它属性保留 filtered = append(filtered, a) } c.Attr = filtered } } // 递归处理子节点(如果节点尚未被删除) if c.Parent != nil { walk(c) } c = next } } // 3. 执行移除 walk(doc) // 4. 将处理后的树重新渲染回字符串 var buf bytes.Buffer if err = html.Render(&buf, doc); err != nil { logging.LogWarnf("render svg failed: %v", err) return svgInput } // 5. 提取 SVG 部分 (html.Render 会自动加上 标签) return extractSVG(buf.String()) } func extractSVG(fullHTML string) string { start := strings.Index(fullHTML, "") if start == -1 || end == -1 { return fullHTML } return fullHTML[start : end+6] }