|
|
|
package bloom
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"strconv"
|
|
|
|
|
|
|
|
"github.com/zeromicro/go-zero/core/hash"
|
|
|
|
"github.com/zeromicro/go-zero/core/stores/redis"
|
|
|
|
)
|
|
|
|
|
|
|
|
// for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
|
|
|
|
// maps as k in the error rate table
|
|
|
|
const maps = 14
|
|
|
|
|
|
|
|
var (
|
|
|
|
// ErrTooLargeOffset indicates the offset is too large in bitset.
|
|
|
|
ErrTooLargeOffset = errors.New("too large offset")
|
|
|
|
|
|
|
|
setScript = redis.NewScript(`
|
|
|
|
for _, offset in ipairs(ARGV) do
|
|
|
|
redis.call("setbit", KEYS[1], offset, 1)
|
|
|
|
end
|
|
|
|
`)
|
|
|
|
testScript = redis.NewScript(`
|
|
|
|
for _, offset in ipairs(ARGV) do
|
|
|
|
if tonumber(redis.call("getbit", KEYS[1], offset)) == 0 then
|
|
|
|
return false
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return true
|
|
|
|
`)
|
|
|
|
)
|
|
|
|
|
|
|
|
type (
|
|
|
|
// A Filter is a bloom filter.
|
|
|
|
Filter struct {
|
|
|
|
bits uint
|
|
|
|
bitSet bitSetProvider
|
|
|
|
}
|
|
|
|
|
|
|
|
bitSetProvider interface {
|
|
|
|
check(ctx context.Context, offsets []uint) (bool, error)
|
|
|
|
set(ctx context.Context, offsets []uint) error
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
// New create a Filter, store is the backed redis, key is the key for the bloom filter,
|
|
|
|
// bits is how many bits will be used, maps is how many hashes for each addition.
|
|
|
|
// best practices:
|
|
|
|
// elements - means how many actual elements
|
|
|
|
// when maps = 14, formula: 0.7*(bits/maps), bits = 20*elements, the error rate is 0.000067 < 1e-4
|
|
|
|
// for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
|
|
|
|
func New(store *redis.Redis, key string, bits uint) *Filter {
|
|
|
|
return &Filter{
|
|
|
|
bits: bits,
|
|
|
|
bitSet: newRedisBitSet(store, key, bits),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add adds data into f.
|
|
|
|
func (f *Filter) Add(data []byte) error {
|
|
|
|
return f.AddCtx(context.Background(), data)
|
|
|
|
}
|
|
|
|
|
|
|
|
// AddCtx adds data into f with context.
|
|
|
|
func (f *Filter) AddCtx(ctx context.Context, data []byte) error {
|
|
|
|
locations := f.getLocations(data)
|
|
|
|
return f.bitSet.set(ctx, locations)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Exists checks if data is in f.
|
|
|
|
func (f *Filter) Exists(data []byte) (bool, error) {
|
|
|
|
return f.ExistsCtx(context.Background(), data)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ExistsCtx checks if data is in f with context.
|
|
|
|
func (f *Filter) ExistsCtx(ctx context.Context, data []byte) (bool, error) {
|
|
|
|
locations := f.getLocations(data)
|
|
|
|
isSet, err := f.bitSet.check(ctx, locations)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return isSet, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *Filter) getLocations(data []byte) []uint {
|
|
|
|
locations := make([]uint, maps)
|
|
|
|
for i := uint(0); i < maps; i++ {
|
|
|
|
hashValue := hash.Hash(append(data, byte(i)))
|
|
|
|
locations[i] = uint(hashValue % uint64(f.bits))
|
|
|
|
}
|
|
|
|
|
|
|
|
return locations
|
|
|
|
}
|
|
|
|
|
|
|
|
type redisBitSet struct {
|
|
|
|
store *redis.Redis
|
|
|
|
key string
|
|
|
|
bits uint
|
|
|
|
}
|
|
|
|
|
|
|
|
func newRedisBitSet(store *redis.Redis, key string, bits uint) *redisBitSet {
|
|
|
|
return &redisBitSet{
|
|
|
|
store: store,
|
|
|
|
key: key,
|
|
|
|
bits: bits,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *redisBitSet) buildOffsetArgs(offsets []uint) ([]string, error) {
|
|
|
|
var args []string
|
|
|
|
|
|
|
|
for _, offset := range offsets {
|
|
|
|
if offset >= r.bits {
|
|
|
|
return nil, ErrTooLargeOffset
|
|
|
|
}
|
|
|
|
|
|
|
|
args = append(args, strconv.FormatUint(uint64(offset), 10))
|
|
|
|
}
|
|
|
|
|
|
|
|
return args, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *redisBitSet) check(ctx context.Context, offsets []uint) (bool, error) {
|
|
|
|
args, err := r.buildOffsetArgs(offsets)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
resp, err := r.store.ScriptRunCtx(ctx, testScript, []string{r.key}, args)
|
|
|
|
if errors.Is(err, redis.Nil) {
|
|
|
|
return false, nil
|
|
|
|
} else if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
exists, ok := resp.(int64)
|
|
|
|
if !ok {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return exists == 1, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// del only use for testing.
|
|
|
|
func (r *redisBitSet) del() error {
|
|
|
|
_, err := r.store.Del(r.key)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// expire only use for testing.
|
|
|
|
func (r *redisBitSet) expire(seconds int) error {
|
|
|
|
return r.store.Expire(r.key, seconds)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *redisBitSet) set(ctx context.Context, offsets []uint) error {
|
|
|
|
args, err := r.buildOffsetArgs(offsets)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = r.store.ScriptRunCtx(ctx, setScript, []string{r.key}, args)
|
|
|
|
if errors.Is(err, redis.Nil) {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return err
|
|
|
|
}
|