|
|
|
package filex
|
|
|
|
|
|
|
|
import (
|
|
|
|
"io"
|
|
|
|
"os"
|
|
|
|
)
|
|
|
|
|
|
|
|
// OffsetRange represents a content block of a file.
|
|
|
|
type OffsetRange struct {
|
|
|
|
File string
|
|
|
|
Start int64
|
|
|
|
Stop int64
|
|
|
|
}
|
|
|
|
|
|
|
|
// SplitLineChunks splits file into chunks.
|
|
|
|
// The whole line are guaranteed to be split in the same chunk.
|
|
|
|
func SplitLineChunks(filename string, chunks int) ([]OffsetRange, error) {
|
|
|
|
info, err := os.Stat(filename)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if chunks <= 1 {
|
|
|
|
return []OffsetRange{
|
|
|
|
{
|
|
|
|
File: filename,
|
|
|
|
Start: 0,
|
|
|
|
Stop: info.Size(),
|
|
|
|
},
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
file, err := os.Open(filename)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
var ranges []OffsetRange
|
|
|
|
var offset int64
|
|
|
|
// avoid the last chunk too few bytes
|
|
|
|
preferSize := info.Size()/int64(chunks) + 1
|
|
|
|
for {
|
|
|
|
if offset+preferSize >= info.Size() {
|
|
|
|
ranges = append(ranges, OffsetRange{
|
|
|
|
File: filename,
|
|
|
|
Start: offset,
|
|
|
|
Stop: info.Size(),
|
|
|
|
})
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
offsetRange, err := nextRange(file, offset, offset+preferSize)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
ranges = append(ranges, offsetRange)
|
|
|
|
if offsetRange.Stop < info.Size() {
|
|
|
|
offset = offsetRange.Stop
|
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ranges, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func nextRange(file *os.File, start, stop int64) (OffsetRange, error) {
|
|
|
|
offset, err := skipPartialLine(file, stop)
|
|
|
|
if err != nil {
|
|
|
|
return OffsetRange{}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return OffsetRange{
|
|
|
|
File: file.Name(),
|
|
|
|
Start: start,
|
|
|
|
Stop: offset,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func skipPartialLine(file *os.File, offset int64) (int64, error) {
|
|
|
|
for {
|
|
|
|
skipBuf := make([]byte, bufSize)
|
|
|
|
n, err := file.ReadAt(skipBuf, offset)
|
|
|
|
if err != nil && err != io.EOF {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if n == 0 {
|
|
|
|
return 0, io.EOF
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := 0; i < n; i++ {
|
|
|
|
if skipBuf[i] != '\r' && skipBuf[i] != '\n' {
|
|
|
|
offset++
|
|
|
|
} else {
|
|
|
|
for ; i < n; i++ {
|
|
|
|
if skipBuf[i] == '\r' || skipBuf[i] == '\n' {
|
|
|
|
offset++
|
|
|
|
} else {
|
|
|
|
return offset, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return offset, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|