♻️ 一些字符串操作函数移到 Gulu 项目里

This commit is contained in:
Liang Ding 2022-06-11 11:32:00 +08:00
parent bb2fafccb1
commit c03fba9ff7
No known key found for this signature in database
GPG key ID: 136F30F901A2231D
8 changed files with 15 additions and 111 deletions

View file

@ -15,99 +15,3 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package util
import (
"strings"
"unicode"
)
func RemoveInvisible(str string) string {
str = strings.ReplaceAll(str, "\u00A0", " ") // NBSP 转换为普通空格
str = RemoveZeroWidthCharacters(str)
str = stripCtlFromUTF8(str)
return str
}
func stripCtlFromUTF8(str string) string {
return strings.Map(func(r rune) rune {
if r >= 32 && r != 127 {
return r
}
return -1
}, str)
}
const (
// ZWSP represents zero-width space.
ZWSP = '\u200B'
// ZWNBSP represents zero-width no-break space.
ZWNBSP = '\uFEFF'
// ZWJ represents zero-width joiner.
ZWJ = '\u200D'
// ZWNJ represents zero-width non-joiner.
ZWNJ = '\u200C'
empty = ""
)
var replacer = strings.NewReplacer(string(ZWSP), empty,
string(ZWNBSP), empty,
string(ZWJ), empty,
string(ZWNJ), empty)
// HasZeroWidthCharacters reports whether string s contains zero-width characters.
func HasZeroWidthCharacters(s string) bool {
return strings.ContainsRune(s, ZWSP) ||
strings.ContainsRune(s, ZWNBSP) ||
strings.ContainsRune(s, ZWJ) ||
strings.ContainsRune(s, ZWNJ)
}
// RemoveZeroWidthCharacters removes all zero-width characters from string s.
func RemoveZeroWidthCharacters(s string) string {
return replacer.Replace(s)
}
// RemoveZeroWidthSpace removes zero-width space characters from string s.
func RemoveZeroWidthSpace(s string) string {
return strings.Replace(s, string(ZWSP), empty, -1)
}
// RemoveZeroWidthNoBreakSpace removes zero-width no-break space characters from string s.
func RemoveZeroWidthNoBreakSpace(s string) string {
return strings.Replace(s, string(ZWNBSP), empty, -1)
}
// RemoveZeroWidthJoiner removes zero-width joiner characters from string s.
func RemoveZeroWidthJoiner(s string) string {
return strings.Replace(s, string(ZWJ), empty, -1)
}
// RemoveZeroWidthNonJoiner removes zero-width non-joiner characters from string s.
func RemoveZeroWidthNonJoiner(s string) string {
return strings.Replace(s, string(ZWNJ), empty, -1)
}
func IsASCII(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] > unicode.MaxASCII {
return false
}
}
return true
}
func SubstringsBetween(str, start, end string) (ret []string) {
parts := strings.Split(str, start)
for _, p := range parts {
if !strings.Contains(p, end) {
continue
}
parts2 := strings.Split(p, end)
ret = append(ret, parts2[0])
}
return
}