From b3e7d2901fdcf181035128e023b3a96ec277882b Mon Sep 17 00:00:00 2001 From: Kevin Wan Date: Tue, 25 Jan 2022 11:14:56 +0800 Subject: [PATCH] Feature/trie ac automation (#1479) * fix: trie ac automation issues * fix: trie ac automation issues * fix: trie ac automation issues * fix: trie ac automation issues --- core/stringx/node.go | 73 +++++++------- core/stringx/node_fuzz_test.go | 87 +++++++++++++++++ core/stringx/node_test.go | 172 ++++++++++++++++++++++++++++++++- core/stringx/replacer_test.go | 4 +- 4 files changed, 296 insertions(+), 40 deletions(-) create mode 100644 core/stringx/node_fuzz_test.go diff --git a/core/stringx/node.go b/core/stringx/node.go index e11eb686..867afd94 100644 --- a/core/stringx/node.go +++ b/core/stringx/node.go @@ -36,32 +36,30 @@ func (n *node) add(word string) { } func (n *node) build() { - n.fail = n + var nodes []*node for _, child := range n.children { child.fail = n - n.buildNode(child) + nodes = append(nodes, child) } -} - -func (n *node) buildNode(nd *node) { - if nd.children == nil { - return - } - - var fifo []*node - for key, child := range nd.children { - fifo = append(fifo, child) - - if fail, ok := nd.fail.children[key]; ok { - child.fail = fail - } else { - child.fail = n + for len(nodes) > 0 { + nd := nodes[0] + nodes = nodes[1:] + for key, child := range nd.children { + nodes = append(nodes, child) + cur := nd + for cur != nil { + if cur.fail == nil { + child.fail = n + break + } + if fail, ok := cur.fail.children[key]; ok { + child.fail = fail + break + } + cur = cur.fail + } } } - - for _, val := range fifo { - n.buildNode(val) - } } func (n *node) find(chars []rune) []scope { @@ -73,27 +71,28 @@ func (n *node) find(chars []rune) []scope { child, ok := cur.children[chars[i]] if ok { cur = child - } else if cur == n { - continue } else { - cur = cur.fail - if child, ok = cur.children[chars[i]]; !ok { + for cur != n { + cur = cur.fail + if child, ok = cur.children[chars[i]]; ok { + cur = child + break + } + } + + if child == nil { continue } - cur = child } - if child.end { - scopes = append(scopes, scope{ - start: i + 1 - child.depth, - stop: i + 1, - }) - } - if child.fail != n && child.fail.end { - scopes = append(scopes, scope{ - start: i + 1 - child.fail.depth, - stop: i + 1, - }) + for child != n { + if child.end { + scopes = append(scopes, scope{ + start: i + 1 - child.depth, + stop: i + 1, + }) + } + child = child.fail } } diff --git a/core/stringx/node_fuzz_test.go b/core/stringx/node_fuzz_test.go new file mode 100644 index 00000000..8c269513 --- /dev/null +++ b/core/stringx/node_fuzz_test.go @@ -0,0 +1,87 @@ +//go:build go1.18 +// +build go1.18 + +package stringx + +import ( + "fmt" + "math/rand" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func FuzzNodeFind(f *testing.F) { + rand.Seed(time.Now().UnixNano()) + + f.Add(10) + f.Fuzz(func(t *testing.T, keys int) { + str := Randn(rand.Intn(100) + 50) + keywords := make(map[string]struct{}) + for i := 0; i < keys; i++ { + keyword := Randn(rand.Intn(10) + 5) + if !strings.Contains(str, keyword) { + keywords[keyword] = struct{}{} + } + } + + size := len(str) + var scopes []scope + var n node + for i := 0; i < size%20; i++ { + start := rand.Intn(size) + stop := start + rand.Intn(20) + 1 + if stop > size { + stop = size + } + if start == stop { + continue + } + + keyword := str[start:stop] + if _, ok := keywords[keyword]; ok { + continue + } + + keywords[keyword] = struct{}{} + var pos int + for pos <= len(str)-len(keyword) { + val := str[pos:] + p := strings.Index(val, keyword) + if p < 0 { + break + } + + scopes = append(scopes, scope{ + start: pos + p, + stop: pos + p + len(keyword), + }) + pos += p + 1 + } + } + + for keyword := range keywords { + n.add(keyword) + } + n.build() + + var buf strings.Builder + buf.WriteString("keywords:\n") + for key := range keywords { + fmt.Fprintf(&buf, "\t%q,\n", key) + } + buf.WriteString("scopes:\n") + for _, scp := range scopes { + fmt.Fprintf(&buf, "\t{%d, %d},\n", scp.start, scp.stop) + } + fmt.Fprintf(&buf, "text:\n\t%s\n", str) + defer func() { + if r := recover(); r != nil { + t.Errorf(buf.String()) + } + }() + assert.ElementsMatchf(t, scopes, n.find([]rune(str)), buf.String()) + }) +} diff --git a/core/stringx/node_test.go b/core/stringx/node_test.go index c3dcba9b..f9b8d1d0 100644 --- a/core/stringx/node_test.go +++ b/core/stringx/node_test.go @@ -1,6 +1,176 @@ package stringx -import "testing" +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFuzzNodeCase1(t *testing.T) { + keywords := []string{ + "cs8Zh", + "G1OihlVuBz", + "K6azS2FBHjI", + "DQKvghI4", + "l7bA86Sze", + "tjBLZhCao", + "nEsXmVzP", + "cbRh8UE1nO3s", + "Wta3R2WcbGP", + "jpOIcA", + "TtkRr4k9hI", + "OKbSo0clAYTtk", + "uJs1WToEanlKV", + "05Y02iFD2", + "x2uJs1WToEanlK", + "ieaSWe", + "Kg", + "FD2bCKFazH", + } + scopes := []scope{ + {62, 72}, + {52, 65}, + {21, 34}, + {1, 10}, + {19, 33}, + {36, 42}, + {42, 44}, + {7, 17}, + } + n := new(node) + for _, key := range keywords { + n.add(key) + } + n.build() + assert.ElementsMatch(t, scopes, n.find([]rune("Z05Y02iFD2bCKFazHtrx2uJs1WToEanlKVWKieaSWeKgmnUXV0ZjOKbSo0clAYTtkRr4k9hI"))) +} + +func TestFuzzNodeCase2(t *testing.T) { + keywords := []string{ + "IP1NPsJKIvt", + "Iw7hQARwSTw", + "SmZIcA", + "OyxHPYkoQzFO", + "3suCnuSAS5d", + "HUMpbi", + "HPdvbGGpY", + "49qjMtR8bEt", + "a0zrrGKVTJ2", + "WbOBcszeo1FfZ", + "8tHUi5PJI", + "Oa2Di", + "6ZWa5nr1tU", + "o0LJRfmeXB9bF9", + "veF0ehKxH", + "Qp73r", + "B6Rmds4ELY8", + "uNpGybQZG", + "Ogm3JqicRZlA4n", + "FL6LVErKomc84H", + "qv2Pi0xJj3cR1", + "bPWLBg4", + "hYN8Q4M1sw", + "ExkTgNklmlIx", + "eVgHHDOxOUEj", + "5WPEVv0tR", + "CPjnOAqUZgV", + "oR3Ogtz", + "jwk1Zbg", + "DYqguyk8h", + "rieroDmpvYFK", + "MQ9hZnMjDqrNQe", + "EhM4KqkCBd", + "m9xalj6q", + "d5CTL5mzK", + "XJOoTvFtI8U", + "iFAwspJ", + "iGv8ErnRZIuSWX", + "i8C1BqsYX", + "vXN1KOaOgU", + "GHJFB", + "Y6OlAqbZxYG8", + "dzd4QscSih4u", + "SsLYMkKvB9APx", + "gi0huB3", + "CMICHDCSvSrgiACXVkN", + "MwOvyHbaxdaqpZpU", + "wOvyHbaxdaqpZpUbI", + "2TT5WEy", + "eoCq0T2MC", + "ZpUbI7", + "oCq0T2MCp", + "CpLFgLg0g", + "FgLg0gh", + "w5awC5HeoCq", + "1c", + } + scopes := []scope{ + {0, 19}, + {57, 73}, + {58, 75}, + {47, 54}, + {29, 38}, + {70, 76}, + {30, 39}, + {37, 46}, + {40, 47}, + {22, 33}, + {92, 94}, + } + n := new(node) + for _, key := range keywords { + n.add(key) + } + n.build() + assert.ElementsMatch(t, scopes, n.find([]rune("CMICHDCSvSrgiACXVkNF9lw5awC5HeoCq0T2MCpLFgLg0gh2TT5WEyINrMwOvyHbaxdaqpZpUbI7SpIY5yVWf33MuX7K1c"))) +} + +func TestFuzzNodeCase3(t *testing.T) { + keywords := []string{ + "QAraACKOftI4", + "unRmd2EO0", + "s25OtuoU", + "aGlmn7KnbE4HCX", + "kuK6Uh", + "ckuK6Uh", + "uK6Uh", + "Iy", + "h", + "PMSSUNvyi", + "ahz0i", + "Lhs4XZ1e", + "shPp1Va7aQNVme", + "yIUckuK6Uh", + "pKjIyI", + "jIyIUckuK6Uh", + "UckuK6Uh", + "Uh", + "JPAULjQgHJ", + "Wp", + "sbkZxXurrI", + "pKjIyIUckuK6Uh", + } + scopes := []scope{ + {9, 15}, + {8, 15}, + {5, 15}, + {1, 7}, + {10, 15}, + {3, 15}, + {0, 2}, + {1, 15}, + {7, 15}, + {13, 15}, + {4, 6}, + {14, 15}, + } + n := new(node) + for _, key := range keywords { + n.add(key) + } + n.build() + assert.ElementsMatch(t, scopes, n.find([]rune("WpKjIyIUckuK6Uh"))) +} func BenchmarkNodeFind(b *testing.B) { b.ReportAllocs() diff --git a/core/stringx/replacer_test.go b/core/stringx/replacer_test.go index 08c2661f..8cb8851b 100644 --- a/core/stringx/replacer_test.go +++ b/core/stringx/replacer_test.go @@ -83,7 +83,7 @@ func TestReplacer_ReplaceEmpty(t *testing.T) { assert.Equal(t, "", NewReplacer(mapping).Replace("")) } -func TestFuzzCase1(t *testing.T) { +func TestFuzzReplacerCase1(t *testing.T) { keywords := map[string]string{ "yQyJykiqoh": "xw", "tgN70z": "Q2P", @@ -115,7 +115,7 @@ func TestFuzzCase1(t *testing.T) { } } -func TestFuzzCase2(t *testing.T) { +func TestFuzzReplacerCase2(t *testing.T) { keywords := map[string]string{ "dmv2SGZvq9Yz": "TE", "rCL5DRI9uFP8": "hvsc8",