|
|
|
package mr
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
"github.com/tal-tech/go-zero/core/errorx"
|
|
|
|
"github.com/tal-tech/go-zero/core/lang"
|
|
|
|
"github.com/tal-tech/go-zero/core/syncx"
|
|
|
|
"github.com/tal-tech/go-zero/core/threading"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
defaultWorkers = 16
|
|
|
|
minWorkers = 1
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
ErrCancelWithNil = errors.New("mapreduce cancelled with nil")
|
|
|
|
ErrReduceNoOutput = errors.New("reduce not writing value")
|
|
|
|
)
|
|
|
|
|
|
|
|
type (
|
|
|
|
GenerateFunc func(source chan<- interface{})
|
|
|
|
MapFunc func(item interface{}, writer Writer)
|
|
|
|
VoidMapFunc func(item interface{})
|
|
|
|
MapperFunc func(item interface{}, writer Writer, cancel func(error))
|
|
|
|
ReducerFunc func(pipe <-chan interface{}, writer Writer, cancel func(error))
|
|
|
|
VoidReducerFunc func(pipe <-chan interface{}, cancel func(error))
|
|
|
|
Option func(opts *mapReduceOptions)
|
|
|
|
|
|
|
|
mapReduceOptions struct {
|
|
|
|
workers int
|
|
|
|
}
|
|
|
|
|
|
|
|
Writer interface {
|
|
|
|
Write(v interface{})
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
func Finish(fns ...func() error) error {
|
|
|
|
if len(fns) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return MapReduceVoid(func(source chan<- interface{}) {
|
|
|
|
for _, fn := range fns {
|
|
|
|
source <- fn
|
|
|
|
}
|
|
|
|
}, func(item interface{}, writer Writer, cancel func(error)) {
|
|
|
|
fn := item.(func() error)
|
|
|
|
if err := fn(); err != nil {
|
|
|
|
cancel(err)
|
|
|
|
}
|
|
|
|
}, func(pipe <-chan interface{}, cancel func(error)) {
|
|
|
|
drain(pipe)
|
|
|
|
}, WithWorkers(len(fns)))
|
|
|
|
}
|
|
|
|
|
|
|
|
func FinishVoid(fns ...func()) {
|
|
|
|
if len(fns) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
MapVoid(func(source chan<- interface{}) {
|
|
|
|
for _, fn := range fns {
|
|
|
|
source <- fn
|
|
|
|
}
|
|
|
|
}, func(item interface{}) {
|
|
|
|
fn := item.(func())
|
|
|
|
fn()
|
|
|
|
}, WithWorkers(len(fns)))
|
|
|
|
}
|
|
|
|
|
|
|
|
func Map(generate GenerateFunc, mapper MapFunc, opts ...Option) chan interface{} {
|
|
|
|
options := buildOptions(opts...)
|
|
|
|
source := buildSource(generate)
|
|
|
|
collector := make(chan interface{}, options.workers)
|
|
|
|
done := syncx.NewDoneChan()
|
|
|
|
|
|
|
|
go mapDispatcher(mapper, source, collector, done.Done(), options.workers)
|
|
|
|
|
|
|
|
return collector
|
|
|
|
}
|
|
|
|
|
|
|
|
func MapReduce(generate GenerateFunc, mapper MapperFunc, reducer ReducerFunc, opts ...Option) (interface{}, error) {
|
|
|
|
source := buildSource(generate)
|
|
|
|
return MapReduceWithSource(source, mapper, reducer, opts...)
|
|
|
|
}
|
|
|
|
|
|
|
|
func MapReduceWithSource(source <-chan interface{}, mapper MapperFunc, reducer ReducerFunc,
|
|
|
|
opts ...Option) (interface{}, error) {
|
|
|
|
options := buildOptions(opts...)
|
|
|
|
output := make(chan interface{})
|
|
|
|
collector := make(chan interface{}, options.workers)
|
|
|
|
done := syncx.NewDoneChan()
|
|
|
|
writer := newGuardedWriter(output, done.Done())
|
|
|
|
var closeOnce sync.Once
|
|
|
|
var retErr errorx.AtomicError
|
|
|
|
finish := func() {
|
|
|
|
closeOnce.Do(func() {
|
|
|
|
done.Close()
|
|
|
|
close(output)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
cancel := once(func(err error) {
|
|
|
|
if err != nil {
|
|
|
|
retErr.Set(err)
|
|
|
|
} else {
|
|
|
|
retErr.Set(ErrCancelWithNil)
|
|
|
|
}
|
|
|
|
|
|
|
|
drain(source)
|
|
|
|
finish()
|
|
|
|
})
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
defer func() {
|
|
|
|
if r := recover(); r != nil {
|
|
|
|
cancel(fmt.Errorf("%v", r))
|
|
|
|
} else {
|
|
|
|
finish()
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
reducer(collector, writer, cancel)
|
|
|
|
}()
|
|
|
|
go mapperDispatcher(mapper, source, collector, done.Done(), cancel, options.workers)
|
|
|
|
|
|
|
|
value, ok := <-output
|
|
|
|
if err := retErr.Load(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
} else if ok {
|
|
|
|
return value, nil
|
|
|
|
} else {
|
|
|
|
return nil, ErrReduceNoOutput
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func MapReduceVoid(generator GenerateFunc, mapper MapperFunc, reducer VoidReducerFunc, opts ...Option) error {
|
|
|
|
_, err := MapReduce(generator, mapper, func(input <-chan interface{}, writer Writer, cancel func(error)) {
|
|
|
|
reducer(input, cancel)
|
|
|
|
// We need to write a placeholder to let MapReduce to continue on reducer done,
|
|
|
|
// otherwise, all goroutines are waiting. The placeholder will be discarded by MapReduce.
|
|
|
|
writer.Write(lang.Placeholder)
|
|
|
|
}, opts...)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
func MapVoid(generate GenerateFunc, mapper VoidMapFunc, opts ...Option) {
|
|
|
|
drain(Map(generate, func(item interface{}, writer Writer) {
|
|
|
|
mapper(item)
|
|
|
|
}, opts...))
|
|
|
|
}
|
|
|
|
|
|
|
|
func WithWorkers(workers int) Option {
|
|
|
|
return func(opts *mapReduceOptions) {
|
|
|
|
if workers < minWorkers {
|
|
|
|
opts.workers = minWorkers
|
|
|
|
} else {
|
|
|
|
opts.workers = workers
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func buildOptions(opts ...Option) *mapReduceOptions {
|
|
|
|
options := newOptions()
|
|
|
|
for _, opt := range opts {
|
|
|
|
opt(options)
|
|
|
|
}
|
|
|
|
|
|
|
|
return options
|
|
|
|
}
|
|
|
|
|
|
|
|
func buildSource(generate GenerateFunc) chan interface{} {
|
|
|
|
source := make(chan interface{})
|
|
|
|
threading.GoSafe(func() {
|
|
|
|
defer close(source)
|
|
|
|
generate(source)
|
|
|
|
})
|
|
|
|
|
|
|
|
return source
|
|
|
|
}
|
|
|
|
|
|
|
|
// drain drains the channel.
|
|
|
|
func drain(channel <-chan interface{}) {
|
|
|
|
// drain the channel
|
|
|
|
for range channel {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func executeMappers(mapper MapFunc, input <-chan interface{}, collector chan<- interface{},
|
|
|
|
done <-chan lang.PlaceholderType, workers int) {
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
defer func() {
|
|
|
|
wg.Wait()
|
|
|
|
close(collector)
|
|
|
|
}()
|
|
|
|
|
|
|
|
pool := make(chan lang.PlaceholderType, workers)
|
|
|
|
writer := newGuardedWriter(collector, done)
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-done:
|
|
|
|
return
|
|
|
|
case pool <- lang.Placeholder:
|
|
|
|
item, ok := <-input
|
|
|
|
if !ok {
|
|
|
|
<-pool
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
// better to safely run caller defined method
|
|
|
|
threading.GoSafe(func() {
|
|
|
|
defer func() {
|
|
|
|
wg.Done()
|
|
|
|
<-pool
|
|
|
|
}()
|
|
|
|
|
|
|
|
mapper(item, writer)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func mapDispatcher(mapper MapFunc, input <-chan interface{}, collector chan<- interface{},
|
|
|
|
done <-chan lang.PlaceholderType, workers int) {
|
|
|
|
executeMappers(func(item interface{}, writer Writer) {
|
|
|
|
mapper(item, writer)
|
|
|
|
}, input, collector, done, workers)
|
|
|
|
}
|
|
|
|
|
|
|
|
func mapperDispatcher(mapper MapperFunc, input <-chan interface{}, collector chan<- interface{},
|
|
|
|
done <-chan lang.PlaceholderType, cancel func(error), workers int) {
|
|
|
|
executeMappers(func(item interface{}, writer Writer) {
|
|
|
|
mapper(item, writer, cancel)
|
|
|
|
}, input, collector, done, workers)
|
|
|
|
}
|
|
|
|
|
|
|
|
func newOptions() *mapReduceOptions {
|
|
|
|
return &mapReduceOptions{
|
|
|
|
workers: defaultWorkers,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func once(fn func(error)) func(error) {
|
|
|
|
once := new(sync.Once)
|
|
|
|
return func(err error) {
|
|
|
|
once.Do(func() {
|
|
|
|
fn(err)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type guardedWriter struct {
|
|
|
|
channel chan<- interface{}
|
|
|
|
done <-chan lang.PlaceholderType
|
|
|
|
}
|
|
|
|
|
|
|
|
func newGuardedWriter(channel chan<- interface{}, done <-chan lang.PlaceholderType) guardedWriter {
|
|
|
|
return guardedWriter{
|
|
|
|
channel: channel,
|
|
|
|
done: done,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gw guardedWriter) Write(v interface{}) {
|
|
|
|
select {
|
|
|
|
case <-gw.done:
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
gw.channel <- v
|
|
|
|
}
|
|
|
|
}
|