Feature/trie ac automation (#1479)

* fix: trie ac automation issues

* fix: trie ac automation issues

* fix: trie ac automation issues

* fix: trie ac automation issues
master v1.3.0-beta
Kevin Wan 3 years ago committed by GitHub
parent cdf7ec213c
commit b3e7d2901f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -36,31 +36,29 @@ func (n *node) add(word string) {
} }
func (n *node) build() { func (n *node) build() {
n.fail = n var nodes []*node
for _, child := range n.children { for _, child := range n.children {
child.fail = n child.fail = n
n.buildNode(child) nodes = append(nodes, child)
} }
} for len(nodes) > 0 {
nd := nodes[0]
func (n *node) buildNode(nd *node) { nodes = nodes[1:]
if nd.children == nil {
return
}
var fifo []*node
for key, child := range nd.children { for key, child := range nd.children {
fifo = append(fifo, child) nodes = append(nodes, child)
cur := nd
if fail, ok := nd.fail.children[key]; ok { for cur != nil {
child.fail = fail if cur.fail == nil {
} else {
child.fail = n child.fail = n
break
}
if fail, ok := cur.fail.children[key]; ok {
child.fail = fail
break
}
cur = cur.fail
} }
} }
for _, val := range fifo {
n.buildNode(val)
} }
} }
@ -73,27 +71,28 @@ func (n *node) find(chars []rune) []scope {
child, ok := cur.children[chars[i]] child, ok := cur.children[chars[i]]
if ok { if ok {
cur = child cur = child
} else if cur == n {
continue
} else { } else {
for cur != n {
cur = cur.fail cur = cur.fail
if child, ok = cur.children[chars[i]]; !ok { if child, ok = cur.children[chars[i]]; ok {
cur = child
break
}
}
if child == nil {
continue continue
} }
cur = child
} }
for child != n {
if child.end { if child.end {
scopes = append(scopes, scope{ scopes = append(scopes, scope{
start: i + 1 - child.depth, start: i + 1 - child.depth,
stop: i + 1, stop: i + 1,
}) })
} }
if child.fail != n && child.fail.end { child = child.fail
scopes = append(scopes, scope{
start: i + 1 - child.fail.depth,
stop: i + 1,
})
} }
} }

@ -0,0 +1,87 @@
//go:build go1.18
// +build go1.18
package stringx
import (
"fmt"
"math/rand"
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func FuzzNodeFind(f *testing.F) {
rand.Seed(time.Now().UnixNano())
f.Add(10)
f.Fuzz(func(t *testing.T, keys int) {
str := Randn(rand.Intn(100) + 50)
keywords := make(map[string]struct{})
for i := 0; i < keys; i++ {
keyword := Randn(rand.Intn(10) + 5)
if !strings.Contains(str, keyword) {
keywords[keyword] = struct{}{}
}
}
size := len(str)
var scopes []scope
var n node
for i := 0; i < size%20; i++ {
start := rand.Intn(size)
stop := start + rand.Intn(20) + 1
if stop > size {
stop = size
}
if start == stop {
continue
}
keyword := str[start:stop]
if _, ok := keywords[keyword]; ok {
continue
}
keywords[keyword] = struct{}{}
var pos int
for pos <= len(str)-len(keyword) {
val := str[pos:]
p := strings.Index(val, keyword)
if p < 0 {
break
}
scopes = append(scopes, scope{
start: pos + p,
stop: pos + p + len(keyword),
})
pos += p + 1
}
}
for keyword := range keywords {
n.add(keyword)
}
n.build()
var buf strings.Builder
buf.WriteString("keywords:\n")
for key := range keywords {
fmt.Fprintf(&buf, "\t%q,\n", key)
}
buf.WriteString("scopes:\n")
for _, scp := range scopes {
fmt.Fprintf(&buf, "\t{%d, %d},\n", scp.start, scp.stop)
}
fmt.Fprintf(&buf, "text:\n\t%s\n", str)
defer func() {
if r := recover(); r != nil {
t.Errorf(buf.String())
}
}()
assert.ElementsMatchf(t, scopes, n.find([]rune(str)), buf.String())
})
}

@ -1,6 +1,176 @@
package stringx package stringx
import "testing" import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestFuzzNodeCase1(t *testing.T) {
keywords := []string{
"cs8Zh",
"G1OihlVuBz",
"K6azS2FBHjI",
"DQKvghI4",
"l7bA86Sze",
"tjBLZhCao",
"nEsXmVzP",
"cbRh8UE1nO3s",
"Wta3R2WcbGP",
"jpOIcA",
"TtkRr4k9hI",
"OKbSo0clAYTtk",
"uJs1WToEanlKV",
"05Y02iFD2",
"x2uJs1WToEanlK",
"ieaSWe",
"Kg",
"FD2bCKFazH",
}
scopes := []scope{
{62, 72},
{52, 65},
{21, 34},
{1, 10},
{19, 33},
{36, 42},
{42, 44},
{7, 17},
}
n := new(node)
for _, key := range keywords {
n.add(key)
}
n.build()
assert.ElementsMatch(t, scopes, n.find([]rune("Z05Y02iFD2bCKFazHtrx2uJs1WToEanlKVWKieaSWeKgmnUXV0ZjOKbSo0clAYTtkRr4k9hI")))
}
func TestFuzzNodeCase2(t *testing.T) {
keywords := []string{
"IP1NPsJKIvt",
"Iw7hQARwSTw",
"SmZIcA",
"OyxHPYkoQzFO",
"3suCnuSAS5d",
"HUMpbi",
"HPdvbGGpY",
"49qjMtR8bEt",
"a0zrrGKVTJ2",
"WbOBcszeo1FfZ",
"8tHUi5PJI",
"Oa2Di",
"6ZWa5nr1tU",
"o0LJRfmeXB9bF9",
"veF0ehKxH",
"Qp73r",
"B6Rmds4ELY8",
"uNpGybQZG",
"Ogm3JqicRZlA4n",
"FL6LVErKomc84H",
"qv2Pi0xJj3cR1",
"bPWLBg4",
"hYN8Q4M1sw",
"ExkTgNklmlIx",
"eVgHHDOxOUEj",
"5WPEVv0tR",
"CPjnOAqUZgV",
"oR3Ogtz",
"jwk1Zbg",
"DYqguyk8h",
"rieroDmpvYFK",
"MQ9hZnMjDqrNQe",
"EhM4KqkCBd",
"m9xalj6q",
"d5CTL5mzK",
"XJOoTvFtI8U",
"iFAwspJ",
"iGv8ErnRZIuSWX",
"i8C1BqsYX",
"vXN1KOaOgU",
"GHJFB",
"Y6OlAqbZxYG8",
"dzd4QscSih4u",
"SsLYMkKvB9APx",
"gi0huB3",
"CMICHDCSvSrgiACXVkN",
"MwOvyHbaxdaqpZpU",
"wOvyHbaxdaqpZpUbI",
"2TT5WEy",
"eoCq0T2MC",
"ZpUbI7",
"oCq0T2MCp",
"CpLFgLg0g",
"FgLg0gh",
"w5awC5HeoCq",
"1c",
}
scopes := []scope{
{0, 19},
{57, 73},
{58, 75},
{47, 54},
{29, 38},
{70, 76},
{30, 39},
{37, 46},
{40, 47},
{22, 33},
{92, 94},
}
n := new(node)
for _, key := range keywords {
n.add(key)
}
n.build()
assert.ElementsMatch(t, scopes, n.find([]rune("CMICHDCSvSrgiACXVkNF9lw5awC5HeoCq0T2MCpLFgLg0gh2TT5WEyINrMwOvyHbaxdaqpZpUbI7SpIY5yVWf33MuX7K1c")))
}
func TestFuzzNodeCase3(t *testing.T) {
keywords := []string{
"QAraACKOftI4",
"unRmd2EO0",
"s25OtuoU",
"aGlmn7KnbE4HCX",
"kuK6Uh",
"ckuK6Uh",
"uK6Uh",
"Iy",
"h",
"PMSSUNvyi",
"ahz0i",
"Lhs4XZ1e",
"shPp1Va7aQNVme",
"yIUckuK6Uh",
"pKjIyI",
"jIyIUckuK6Uh",
"UckuK6Uh",
"Uh",
"JPAULjQgHJ",
"Wp",
"sbkZxXurrI",
"pKjIyIUckuK6Uh",
}
scopes := []scope{
{9, 15},
{8, 15},
{5, 15},
{1, 7},
{10, 15},
{3, 15},
{0, 2},
{1, 15},
{7, 15},
{13, 15},
{4, 6},
{14, 15},
}
n := new(node)
for _, key := range keywords {
n.add(key)
}
n.build()
assert.ElementsMatch(t, scopes, n.find([]rune("WpKjIyIUckuK6Uh")))
}
func BenchmarkNodeFind(b *testing.B) { func BenchmarkNodeFind(b *testing.B) {
b.ReportAllocs() b.ReportAllocs()

@ -83,7 +83,7 @@ func TestReplacer_ReplaceEmpty(t *testing.T) {
assert.Equal(t, "", NewReplacer(mapping).Replace("")) assert.Equal(t, "", NewReplacer(mapping).Replace(""))
} }
func TestFuzzCase1(t *testing.T) { func TestFuzzReplacerCase1(t *testing.T) {
keywords := map[string]string{ keywords := map[string]string{
"yQyJykiqoh": "xw", "yQyJykiqoh": "xw",
"tgN70z": "Q2P", "tgN70z": "Q2P",
@ -115,7 +115,7 @@ func TestFuzzCase1(t *testing.T) {
} }
} }
func TestFuzzCase2(t *testing.T) { func TestFuzzReplacerCase2(t *testing.T) {
keywords := map[string]string{ keywords := map[string]string{
"dmv2SGZvq9Yz": "TE", "dmv2SGZvq9Yz": "TE",
"rCL5DRI9uFP8": "hvsc8", "rCL5DRI9uFP8": "hvsc8",

Loading…
Cancel
Save