chore: optimize string search with Aho–Corasick algorithm (#1476)
* chore: optimize string search with Aho–Corasick algorithm * chore: optimize keywords replacer * fix: replacer bugs * chore: reorder membersmaster
parent
09d1fad6e0
commit
f1102fb262
@ -0,0 +1,25 @@
|
||||
package stringx
|
||||
|
||||
import "testing"
|
||||
|
||||
func BenchmarkNodeFind(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
keywords := []string{
|
||||
"A",
|
||||
"AV",
|
||||
"AV演员",
|
||||
"无名氏",
|
||||
"AV演员色情",
|
||||
"日本AV女优",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
trie.find([]rune("日本AV演员兼电视、电影演员。无名氏AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演"))
|
||||
}
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
//go:build go1.18
|
||||
// +build go1.18
|
||||
|
||||
package stringx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func FuzzReplacerReplace(f *testing.F) {
|
||||
keywords := make(map[string]string)
|
||||
for i := 0; i < 20; i++ {
|
||||
keywords[Randn(rand.Intn(10)+5)] = Randn(rand.Intn(5) + 1)
|
||||
}
|
||||
rep := NewReplacer(keywords)
|
||||
printableKeywords := func() string {
|
||||
var buf strings.Builder
|
||||
for k, v := range keywords {
|
||||
fmt.Fprintf(&buf, "%q: %q,\n", k, v)
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
f.Add(50)
|
||||
f.Fuzz(func(t *testing.T, n int) {
|
||||
text := Randn(rand.Intn(n%50+50) + 1)
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Errorf("mapping: %s\ntext: %s", printableKeywords(), text)
|
||||
}
|
||||
}()
|
||||
val := rep.Replace(text)
|
||||
keys := rep.(*replacer).node.find([]rune(val))
|
||||
if len(keys) > 0 {
|
||||
t.Errorf("mapping: %s\ntext: %s\nresult: %s\nmatch: %v",
|
||||
printableKeywords(), text, val, keys)
|
||||
}
|
||||
})
|
||||
}
|
Loading…
Reference in New Issue