support customized mask char on trie

master
kevin 4 years ago
parent 2673dbc6e1
commit 2ebb5b6b58

@ -2,7 +2,11 @@ package stringx
import "github.com/tal-tech/go-zero/core/lang" import "github.com/tal-tech/go-zero/core/lang"
const defaultMask = '*'
type ( type (
TrieOption func(trie *trieNode)
Trie interface { Trie interface {
Filter(text string) (string, []string, bool) Filter(text string) (string, []string, bool)
FindKeywords(text string) []string FindKeywords(text string) []string
@ -10,6 +14,7 @@ type (
trieNode struct { trieNode struct {
node node
mask rune
} }
scope struct { scope struct {
@ -18,8 +23,15 @@ type (
} }
) )
func NewTrie(words []string) Trie { func NewTrie(words []string, opts ...TrieOption) Trie {
n := new(trieNode) n := new(trieNode)
for _, opt := range opts {
opt(n)
}
if n.mask == 0 {
n.mask = defaultMask
}
for _, word := range words { for _, word := range words {
n.add(word) n.add(word)
} }
@ -114,6 +126,12 @@ func (n *trieNode) findKeywordScopes(chars []rune) []scope {
func (n *trieNode) replaceWithAsterisk(chars []rune, start, stop int) { func (n *trieNode) replaceWithAsterisk(chars []rune, start, stop int) {
for i := start; i < stop; i++ { for i := start; i < stop; i++ {
chars[i] = '*' chars[i] = n.mask
}
}
func WithMask(mask rune) TrieOption {
return func(n *trieNode) {
n.mask = mask
} }
} }

@ -109,25 +109,25 @@ func TestTrie(t *testing.T) {
func TestTrieSingleWord(t *testing.T) { func TestTrieSingleWord(t *testing.T) {
trie := NewTrie([]string{ trie := NewTrie([]string{
"闹", "闹",
}) }, WithMask('#'))
output, keywords, ok := trie.Filter("今晚真热闹") output, keywords, ok := trie.Filter("今晚真热闹")
assert.ElementsMatch(t, []string{"闹"}, keywords) assert.ElementsMatch(t, []string{"闹"}, keywords)
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, "今晚真热*", output) assert.Equal(t, "今晚真热#", output)
} }
func TestTrieOverlap(t *testing.T) { func TestTrieOverlap(t *testing.T) {
trie := NewTrie([]string{ trie := NewTrie([]string{
"一二三四五", "一二三四五",
"二三四五六七八", "二三四五六七八",
}) }, WithMask('#'))
output, keywords, ok := trie.Filter("零一二三四五六七八九十") output, keywords, ok := trie.Filter("零一二三四五六七八九十")
assert.ElementsMatch(t, []string{ assert.ElementsMatch(t, []string{
"一二三四五", "一二三四五",
"二三四五六七八", "二三四五六七八",
}, keywords) }, keywords)
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, "零********九十", output) assert.Equal(t, "零########九十", output)
} }
func TestTrieNested(t *testing.T) { func TestTrieNested(t *testing.T) {
@ -135,7 +135,7 @@ func TestTrieNested(t *testing.T) {
"一二三", "一二三",
"一二三四五", "一二三四五",
"一二三四五六七八", "一二三四五六七八",
}) }, WithMask('#'))
output, keywords, ok := trie.Filter("零一二三四五六七八九十") output, keywords, ok := trie.Filter("零一二三四五六七八九十")
assert.ElementsMatch(t, []string{ assert.ElementsMatch(t, []string{
"一二三", "一二三",
@ -143,7 +143,7 @@ func TestTrieNested(t *testing.T) {
"一二三四五六七八", "一二三四五六七八",
}, keywords) }, keywords)
assert.True(t, ok) assert.True(t, ok)
assert.Equal(t, "零********九十", output) assert.Equal(t, "零########九十", output)
} }
func BenchmarkTrie(b *testing.B) { func BenchmarkTrie(b *testing.B) {

Loading…
Cancel
Save