Add grpc retry (#1160)
* Add grpc retry * Update grpc retry * Add tests * Fix a bug * Add api && some tests * Add comment * Add double check * Add server retry quota * Update optimize code * Fix bug * Update optimize code * Update optimize code * Fix bugmaster
parent
496a2f341e
commit
462ddbb145
@ -0,0 +1,31 @@
|
||||
package backoff
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Func func(attempt int) time.Duration
|
||||
|
||||
// LinearWithJitter waits a set period of time, allowing for jitter (fractional adjustment).
|
||||
func LinearWithJitter(waitBetween time.Duration, jitterFraction float64) Func {
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
return func(attempt int) time.Duration {
|
||||
multiplier := jitterFraction * (r.Float64()*2 - 1)
|
||||
return time.Duration(float64(waitBetween) * (1 + multiplier))
|
||||
}
|
||||
}
|
||||
|
||||
// Interval it waits for a fixed period of time between calls.
|
||||
func Interval(interval time.Duration) Func {
|
||||
return func(attempt int) time.Duration {
|
||||
return interval
|
||||
}
|
||||
}
|
||||
|
||||
// Exponential produces increasing intervals for each attempt.
|
||||
func Exponential(scalar time.Duration) Func {
|
||||
return func(attempt int) time.Duration {
|
||||
return scalar * time.Duration((1<<attempt)>>1)
|
||||
}
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
package backoff
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestWaitBetween(t *testing.T) {
|
||||
fn := Interval(time.Second)
|
||||
assert.EqualValues(t, time.Second, fn(1))
|
||||
}
|
||||
|
||||
func TestExponential(t *testing.T) {
|
||||
fn := Exponential(time.Second)
|
||||
assert.EqualValues(t, time.Second, fn(1))
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
package retry
|
||||
|
||||
import (
|
||||
"github.com/tal-tech/go-zero/core/retry/backoff"
|
||||
"google.golang.org/grpc/codes"
|
||||
"time"
|
||||
)
|
||||
|
||||
// WithDisable disables the retry behaviour on this call, or this interceptor.
|
||||
//
|
||||
// Its semantically the same to `WithMax`
|
||||
func WithDisable() *CallOption {
|
||||
return WithMax(0)
|
||||
}
|
||||
|
||||
// WithMax sets the maximum number of retries on this call, or this interceptor.
|
||||
func WithMax(maxRetries int) *CallOption {
|
||||
return &CallOption{apply: func(options *options) {
|
||||
options.max = maxRetries
|
||||
}}
|
||||
}
|
||||
|
||||
// WithBackoff sets the `BackoffFunc` used to control time between retries.
|
||||
func WithBackoff(backoffFunc backoff.Func) *CallOption {
|
||||
return &CallOption{apply: func(o *options) {
|
||||
o.backoffFunc = backoffFunc
|
||||
}}
|
||||
}
|
||||
|
||||
// WithCodes Allow code to be retried.
|
||||
func WithCodes(retryCodes ...codes.Code) *CallOption {
|
||||
return &CallOption{apply: func(o *options) {
|
||||
o.codes = retryCodes
|
||||
}}
|
||||
}
|
||||
|
||||
// WithPerRetryTimeout timeout for each retry
|
||||
func WithPerRetryTimeout(timeout time.Duration) *CallOption {
|
||||
return &CallOption{apply: func(o *options) {
|
||||
o.perCallTimeout = timeout
|
||||
}}
|
||||
}
|
@ -0,0 +1,92 @@
|
||||
package retry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/tal-tech/go-zero/core/logx"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/metadata"
|
||||
"google.golang.org/grpc/status"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRetryWithDisable(t *testing.T) {
|
||||
opt := &options{}
|
||||
assert.EqualValues(t, &options{}, parseRetryCallOptions(opt, WithDisable()))
|
||||
}
|
||||
|
||||
func TestRetryWithMax(t *testing.T) {
|
||||
n := 5
|
||||
for i := 0; i < n; i++ {
|
||||
opt := &options{}
|
||||
assert.EqualValues(t, &options{max: i}, parseRetryCallOptions(opt, WithMax(i)))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetryWithBackoff(t *testing.T) {
|
||||
opt := &options{}
|
||||
|
||||
retryCallOptions := parseRetryCallOptions(opt, WithBackoff(func(attempt int) time.Duration {
|
||||
return time.Millisecond
|
||||
}))
|
||||
assert.EqualValues(t, time.Millisecond, retryCallOptions.backoffFunc(1))
|
||||
|
||||
}
|
||||
|
||||
func TestRetryWithCodes(t *testing.T) {
|
||||
opt := &options{}
|
||||
c := []codes.Code{codes.Unknown, codes.NotFound}
|
||||
options := parseRetryCallOptions(opt, WithCodes(c...))
|
||||
assert.EqualValues(t, c, options.codes)
|
||||
}
|
||||
|
||||
func TestRetryWithPerRetryTimeout(t *testing.T) {
|
||||
opt := &options{}
|
||||
options := parseRetryCallOptions(opt, WithPerRetryTimeout(time.Millisecond))
|
||||
assert.EqualValues(t, time.Millisecond, options.perCallTimeout)
|
||||
}
|
||||
|
||||
func Test_waitRetryBackoff(t *testing.T) {
|
||||
|
||||
opt := &options{perCallTimeout: time.Second, backoffFunc: func(attempt int) time.Duration {
|
||||
return time.Second
|
||||
}}
|
||||
logger := logx.WithContext(context.Background())
|
||||
err := waitRetryBackoff(logger, 1, context.Background(), opt)
|
||||
assert.NoError(t, err)
|
||||
ctx, cancelFunc := context.WithTimeout(context.Background(), time.Millisecond)
|
||||
defer cancelFunc()
|
||||
err = waitRetryBackoff(logger, 1, ctx, opt)
|
||||
assert.ErrorIs(t, err, status.FromContextError(context.DeadlineExceeded).Err())
|
||||
}
|
||||
|
||||
func Test_isRetriable(t *testing.T) {
|
||||
assert.False(t, isRetriable(status.FromContextError(context.DeadlineExceeded).Err(), &options{codes: DefaultRetriableCodes}))
|
||||
assert.True(t, isRetriable(status.Error(codes.ResourceExhausted, ""), &options{codes: DefaultRetriableCodes}))
|
||||
assert.False(t, isRetriable(errors.New("error"), &options{}))
|
||||
}
|
||||
|
||||
func Test_perCallContext(t *testing.T) {
|
||||
opt := &options{perCallTimeout: time.Second, includeRetryHeader: true}
|
||||
ctx := metadata.NewIncomingContext(context.Background(), map[string][]string{"1": {"1"}})
|
||||
callContext := perCallContext(ctx, opt, 1)
|
||||
md, ok := metadata.FromOutgoingContext(callContext)
|
||||
assert.True(t, ok)
|
||||
assert.EqualValues(t, metadata.MD{"1": {"1"}, AttemptMetadataKey: {"1"}}, md)
|
||||
|
||||
}
|
||||
|
||||
func Test_filterCallOptions(t *testing.T) {
|
||||
grpcEmptyCallOpt := &grpc.EmptyCallOption{}
|
||||
retryCallOpt := &CallOption{}
|
||||
options, retryCallOptions := filterCallOptions([]grpc.CallOption{
|
||||
grpcEmptyCallOpt,
|
||||
retryCallOpt,
|
||||
})
|
||||
assert.EqualValues(t, []grpc.CallOption{grpcEmptyCallOpt}, options)
|
||||
assert.EqualValues(t, []*CallOption{retryCallOpt}, retryCallOptions)
|
||||
|
||||
}
|
@ -0,0 +1,179 @@
|
||||
package retry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/tal-tech/go-zero/core/logx"
|
||||
"github.com/tal-tech/go-zero/core/retry/backoff"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/metadata"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
const AttemptMetadataKey = "x-retry-attempt"
|
||||
|
||||
var (
|
||||
// DefaultRetriableCodes default retry code
|
||||
DefaultRetriableCodes = []codes.Code{codes.ResourceExhausted, codes.Unavailable}
|
||||
// defaultRetryOptions default retry configuration
|
||||
defaultRetryOptions = &options{
|
||||
max: 0, // disabled
|
||||
perCallTimeout: 0, // disabled
|
||||
includeRetryHeader: true,
|
||||
codes: DefaultRetriableCodes,
|
||||
backoffFunc: backoff.LinearWithJitter(50*time.Millisecond /*jitter*/, 0.10),
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
// options retry the configuration
|
||||
options struct {
|
||||
max int
|
||||
perCallTimeout time.Duration
|
||||
includeRetryHeader bool
|
||||
codes []codes.Code
|
||||
backoffFunc backoff.Func
|
||||
}
|
||||
// CallOption is a grpc.CallOption that is local to grpc retry.
|
||||
CallOption struct {
|
||||
grpc.EmptyCallOption // make sure we implement private after() and before() fields so we don't panic.
|
||||
apply func(opt *options)
|
||||
}
|
||||
)
|
||||
|
||||
func waitRetryBackoff(logger logx.Logger, attempt int, ctx context.Context, retryOptions *options) error {
|
||||
var waitTime time.Duration = 0
|
||||
if attempt > 0 {
|
||||
waitTime = retryOptions.backoffFunc(attempt)
|
||||
}
|
||||
if waitTime > 0 {
|
||||
timer := time.NewTimer(waitTime)
|
||||
logger.Infof("grpc retry attempt: %d, backoff for %v", attempt, waitTime)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return status.FromContextError(ctx.Err()).Err()
|
||||
case <-timer.C:
|
||||
// double check
|
||||
err := ctx.Err()
|
||||
if err != nil {
|
||||
return status.FromContextError(err).Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isRetriable(err error, retryOptions *options) bool {
|
||||
errCode := status.Code(err)
|
||||
if isContextError(err) {
|
||||
return false
|
||||
}
|
||||
for _, code := range retryOptions.codes {
|
||||
if code == errCode {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isContextError(err error) bool {
|
||||
code := status.Code(err)
|
||||
return code == codes.DeadlineExceeded || code == codes.Canceled
|
||||
}
|
||||
|
||||
func reuseOrNewWithCallOptions(opt *options, retryCallOptions []*CallOption) *options {
|
||||
if len(retryCallOptions) == 0 {
|
||||
return opt
|
||||
}
|
||||
return parseRetryCallOptions(opt, retryCallOptions...)
|
||||
}
|
||||
|
||||
func parseRetryCallOptions(opt *options, opts ...*CallOption) *options {
|
||||
for _, option := range opts {
|
||||
option.apply(opt)
|
||||
}
|
||||
return opt
|
||||
}
|
||||
|
||||
func perCallContext(ctx context.Context, callOpts *options, attempt int) context.Context {
|
||||
if attempt > 0 {
|
||||
if callOpts.perCallTimeout != 0 {
|
||||
var cancel context.CancelFunc
|
||||
ctx, cancel = context.WithTimeout(ctx, callOpts.perCallTimeout)
|
||||
_ = cancel
|
||||
}
|
||||
if callOpts.includeRetryHeader {
|
||||
cloneMd := extractIncomingAndClone(ctx)
|
||||
cloneMd.Set(AttemptMetadataKey, strconv.Itoa(attempt))
|
||||
ctx = metadata.NewOutgoingContext(ctx, cloneMd)
|
||||
}
|
||||
}
|
||||
|
||||
return ctx
|
||||
}
|
||||
|
||||
func extractIncomingAndClone(ctx context.Context) metadata.MD {
|
||||
md, ok := metadata.FromIncomingContext(ctx)
|
||||
if !ok {
|
||||
return metadata.MD{}
|
||||
}
|
||||
// clone
|
||||
return md.Copy()
|
||||
}
|
||||
|
||||
func filterCallOptions(callOptions []grpc.CallOption) (grpcOptions []grpc.CallOption, retryOptions []*CallOption) {
|
||||
for _, opt := range callOptions {
|
||||
if co, ok := opt.(*CallOption); ok {
|
||||
retryOptions = append(retryOptions, co)
|
||||
} else {
|
||||
grpcOptions = append(grpcOptions, opt)
|
||||
}
|
||||
}
|
||||
return grpcOptions, retryOptions
|
||||
}
|
||||
|
||||
func Do(ctx context.Context, call func(ctx context.Context, opts ...grpc.CallOption) error, opts ...grpc.CallOption) error {
|
||||
logger := logx.WithContext(ctx)
|
||||
grpcOpts, retryOpts := filterCallOptions(opts)
|
||||
callOpts := reuseOrNewWithCallOptions(defaultRetryOptions, retryOpts)
|
||||
|
||||
if callOpts.max == 0 {
|
||||
return call(ctx, opts...)
|
||||
}
|
||||
var lastErr error
|
||||
for attempt := 0; attempt <= callOpts.max; attempt++ {
|
||||
if err := waitRetryBackoff(logger, attempt, ctx, callOpts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
callCtx := perCallContext(ctx, callOpts, attempt)
|
||||
lastErr = call(callCtx, grpcOpts...)
|
||||
|
||||
if lastErr == nil {
|
||||
return nil
|
||||
}
|
||||
if attempt == 0 {
|
||||
logger.Errorf("grpc call failed, got err: %v", lastErr)
|
||||
} else {
|
||||
logger.Errorf("grpc retry attempt: %d, got err: %v", attempt, lastErr)
|
||||
}
|
||||
if isContextError(lastErr) {
|
||||
if ctx.Err() != nil {
|
||||
logger.Errorf("grpc retry attempt: %d, parent context error: %v", attempt, ctx.Err())
|
||||
return lastErr
|
||||
} else if callOpts.perCallTimeout != 0 {
|
||||
logger.Errorf("grpc retry attempt: %d, context error from retry call", attempt)
|
||||
continue
|
||||
}
|
||||
}
|
||||
if !isRetriable(lastErr, callOpts) {
|
||||
return lastErr
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
package retry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDo(t *testing.T) {
|
||||
n := 4
|
||||
for i := 0; i < n; i++ {
|
||||
count := 0
|
||||
err := Do(context.Background(), func(ctx context.Context, opts ...grpc.CallOption) error {
|
||||
count++
|
||||
return status.Error(codes.ResourceExhausted, "ResourceExhausted")
|
||||
|
||||
}, WithMax(i))
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, i+1, count)
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package clientinterceptors
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/tal-tech/go-zero/core/retry"
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
// RetryInterceptor retry interceptor
|
||||
func RetryInterceptor(enable bool) grpc.UnaryClientInterceptor {
|
||||
return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
|
||||
if !enable {
|
||||
return invoker(ctx, method, req, reply, cc, opts...)
|
||||
}
|
||||
return retry.Do(ctx, func(ctx context.Context, callOpts ...grpc.CallOption) error {
|
||||
return invoker(ctx, method, req, reply, cc, callOpts...)
|
||||
}, opts...)
|
||||
}
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
package clientinterceptors
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/tal-tech/go-zero/core/retry"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRetryInterceptor_WithMax(t *testing.T) {
|
||||
n := 4
|
||||
for i := 0; i < n; i++ {
|
||||
count := 0
|
||||
cc := new(grpc.ClientConn)
|
||||
err := RetryInterceptor(true)(context.Background(), "/1", nil, nil, cc,
|
||||
func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, opts ...grpc.CallOption) error {
|
||||
count++
|
||||
return status.Error(codes.ResourceExhausted, "ResourceExhausted")
|
||||
}, retry.WithMax(i))
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, i+1, count)
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,33 @@
|
||||
package serverinterceptors
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/tal-tech/go-zero/core/logx"
|
||||
"github.com/tal-tech/go-zero/core/retry"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/metadata"
|
||||
"google.golang.org/grpc/status"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
func RetryInterceptor(maxAttempt int) grpc.UnaryServerInterceptor {
|
||||
return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
|
||||
var md metadata.MD
|
||||
requestMd, ok := metadata.FromIncomingContext(ctx)
|
||||
if ok {
|
||||
md = requestMd.Copy()
|
||||
attemptMd := md.Get(retry.AttemptMetadataKey)
|
||||
if len(attemptMd) != 0 && attemptMd[0] != "" {
|
||||
if attempt, err := strconv.Atoi(attemptMd[0]); err == nil {
|
||||
if attempt > maxAttempt {
|
||||
logx.WithContext(ctx).Errorf("retries exceeded:%d, max retries:%d", attempt, maxAttempt)
|
||||
return nil, status.Error(codes.FailedPrecondition, "Retries exceeded")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return handler(ctx, req)
|
||||
}
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
package serverinterceptors
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/tal-tech/go-zero/core/retry"
|
||||
"google.golang.org/grpc/metadata"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRetryInterceptor(t *testing.T) {
|
||||
t.Run("retries exceeded", func(t *testing.T) {
|
||||
interceptor := RetryInterceptor(2)
|
||||
ctx := metadata.NewIncomingContext(context.Background(), metadata.New(map[string]string{retry.AttemptMetadataKey: "3"}))
|
||||
resp, err := interceptor(ctx, nil, nil, func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return nil, nil
|
||||
})
|
||||
assert.Error(t, err)
|
||||
assert.Nil(t, resp)
|
||||
})
|
||||
|
||||
t.Run("reasonable retries", func(t *testing.T) {
|
||||
interceptor := RetryInterceptor(2)
|
||||
ctx := metadata.NewIncomingContext(context.Background(), metadata.New(map[string]string{retry.AttemptMetadataKey: "2"}))
|
||||
resp, err := interceptor(ctx, nil, nil, func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return nil, nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.Nil(t, resp)
|
||||
})
|
||||
t.Run("no retries", func(t *testing.T) {
|
||||
interceptor := RetryInterceptor(0)
|
||||
resp, err := interceptor(context.Background(), nil, nil, func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return nil, nil
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.Nil(t, resp)
|
||||
})
|
||||
|
||||
}
|
Loading…
Reference in New Issue