You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
673 lines
14 KiB
Go
673 lines
14 KiB
Go
package scanner
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/zeromicro/go-zero/tools/goctl/pkg/parser/api/token"
|
|
"github.com/zeromicro/go-zero/tools/goctl/util/pathx"
|
|
)
|
|
|
|
const (
|
|
initMode mode = iota
|
|
|
|
// document mode bg
|
|
documentHalfOpen
|
|
documentOpen
|
|
documentHalfClose
|
|
documentClose
|
|
// document mode end
|
|
|
|
// string mode bg
|
|
stringOpen
|
|
stringClose
|
|
// string mode end
|
|
)
|
|
|
|
var missingInput = errors.New("missing input")
|
|
|
|
type mode int
|
|
|
|
// Scanner is a lexical scanner.
|
|
type Scanner struct {
|
|
filename string
|
|
size int
|
|
|
|
data []rune
|
|
position int // current position in input (points to current char)
|
|
readPosition int // current reading position in input (after current char)
|
|
ch rune
|
|
|
|
lines []int
|
|
}
|
|
|
|
// NextToken returns the next token.
|
|
func (s *Scanner) NextToken() (token.Token, error) {
|
|
s.skipWhiteSpace()
|
|
switch s.ch {
|
|
case '/':
|
|
peekOne := s.peekRune()
|
|
switch peekOne {
|
|
case '/':
|
|
return s.scanLineComment(), nil
|
|
case '*':
|
|
return s.scanDocument()
|
|
default:
|
|
return s.newToken(token.QUO), nil
|
|
}
|
|
case '-':
|
|
return s.newToken(token.SUB), nil
|
|
case '*':
|
|
return s.newToken(token.MUL), nil
|
|
case '(':
|
|
return s.newToken(token.LPAREN), nil
|
|
case '[':
|
|
return s.newToken(token.LBRACK), nil
|
|
case '{':
|
|
return s.newToken(token.LBRACE), nil
|
|
case ',':
|
|
return s.newToken(token.COMMA), nil
|
|
case '.':
|
|
position := s.position
|
|
peekOne := s.peekRune()
|
|
if peekOne != '.' {
|
|
return s.newToken(token.DOT), nil
|
|
}
|
|
s.readRune()
|
|
peekOne = s.peekRune()
|
|
if peekOne != '.' {
|
|
return s.newToken(token.DOT), nil
|
|
}
|
|
s.readRune()
|
|
s.readRune()
|
|
return token.Token{
|
|
Type: token.ELLIPSIS,
|
|
Text: "...",
|
|
Position: s.newPosition(position),
|
|
}, nil
|
|
case ')':
|
|
return s.newToken(token.RPAREN), nil
|
|
case ']':
|
|
return s.newToken(token.RBRACK), nil
|
|
case '}':
|
|
return s.newToken(token.RBRACE), nil
|
|
case ';':
|
|
return s.newToken(token.SEMICOLON), nil
|
|
case ':':
|
|
return s.newToken(token.COLON), nil
|
|
case '=':
|
|
return s.newToken(token.ASSIGN), nil
|
|
case '@':
|
|
return s.scanAt()
|
|
case '"':
|
|
return s.scanString('"', token.STRING)
|
|
case '`':
|
|
return s.scanString('`', token.RAW_STRING)
|
|
case 0:
|
|
return token.EofToken, nil
|
|
default:
|
|
if s.isIdentifierLetter(s.ch) {
|
|
return s.scanIdent(), nil
|
|
}
|
|
if s.isDigit(s.ch) {
|
|
return s.scanIntOrDuration(), nil
|
|
}
|
|
tok := token.NewIllegalToken(s.ch, s.newPosition(s.position))
|
|
s.readRune()
|
|
return tok, nil
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) newToken(tp token.Type) token.Token {
|
|
tok := token.Token{
|
|
Type: tp,
|
|
Text: string(s.ch),
|
|
Position: s.positionAt(),
|
|
}
|
|
s.readRune()
|
|
return tok
|
|
}
|
|
|
|
func (s *Scanner) readRune() {
|
|
if s.readPosition >= s.size {
|
|
s.ch = 0
|
|
} else {
|
|
s.ch = s.data[s.readPosition]
|
|
}
|
|
s.position = s.readPosition
|
|
s.readPosition += 1
|
|
}
|
|
|
|
func (s *Scanner) peekRune() rune {
|
|
if s.readPosition >= s.size {
|
|
return 0
|
|
}
|
|
return s.data[s.readPosition]
|
|
}
|
|
|
|
func (s *Scanner) scanString(delim rune, tp token.Type) (token.Token, error) {
|
|
position := s.position
|
|
var stringMode = initMode
|
|
for {
|
|
switch s.ch {
|
|
case delim:
|
|
switch stringMode {
|
|
case initMode:
|
|
stringMode = stringOpen
|
|
case stringOpen:
|
|
stringMode = stringClose
|
|
s.readRune()
|
|
return token.Token{
|
|
Type: tp,
|
|
Text: string(s.data[position:s.position]),
|
|
Position: s.newPosition(position),
|
|
}, nil
|
|
}
|
|
case 0:
|
|
switch stringMode {
|
|
case initMode: // assert: dead code
|
|
return token.ErrorToken, s.assertExpected(token.EOF, tp)
|
|
case stringOpen:
|
|
return token.ErrorToken, s.assertExpectedString(token.EOF.String(), string(delim))
|
|
case stringClose: // assert: dead code
|
|
return token.Token{
|
|
Type: tp,
|
|
Text: string(s.data[position:s.position]),
|
|
Position: s.newPosition(position),
|
|
}, nil
|
|
}
|
|
}
|
|
s.readRune()
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) scanAt() (token.Token, error) {
|
|
position := s.position
|
|
peek := s.peekRune()
|
|
if !s.isLetter(peek) {
|
|
if peek == 0 {
|
|
return token.NewIllegalToken(s.ch, s.positionAt()), nil
|
|
}
|
|
return token.ErrorToken, s.assertExpectedString(string(peek), token.IDENT.String())
|
|
}
|
|
|
|
s.readRune()
|
|
letters := s.scanLetterSet()
|
|
switch letters {
|
|
case "handler":
|
|
return token.Token{
|
|
Type: token.AT_HANDLER,
|
|
Text: "@handler",
|
|
Position: s.newPosition(position),
|
|
}, nil
|
|
case "server":
|
|
return token.Token{
|
|
Type: token.AT_SERVER,
|
|
Text: "@server",
|
|
Position: s.newPosition(position),
|
|
}, nil
|
|
case "doc":
|
|
return token.Token{
|
|
Type: token.AT_DOC,
|
|
Text: "@doc",
|
|
Position: s.newPosition(position),
|
|
}, nil
|
|
default:
|
|
|
|
return token.ErrorToken, s.assertExpectedString(
|
|
"@"+letters,
|
|
token.AT_DOC.String(),
|
|
token.AT_HANDLER.String(),
|
|
token.AT_SERVER.String())
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) scanIntOrDuration() token.Token {
|
|
position := s.position
|
|
for s.isDigit(s.ch) {
|
|
s.readRune()
|
|
}
|
|
switch s.ch {
|
|
case 'n', 'µ', 'm', 's', 'h':
|
|
return s.scanDuration(position)
|
|
default:
|
|
return token.Token{
|
|
Type: token.INT,
|
|
Text: string(s.data[position:s.position]),
|
|
Position: s.newPosition(position),
|
|
}
|
|
}
|
|
}
|
|
|
|
// scanDuration scans a duration literal, for example "1ns", "1µs", "1ms", "1s", "1m", "1h".
|
|
func (s *Scanner) scanDuration(bgPos int) token.Token {
|
|
switch s.ch {
|
|
case 'n':
|
|
return s.scanNanosecond(bgPos)
|
|
case 'µ':
|
|
return s.scanMicrosecond(bgPos)
|
|
case 'm':
|
|
return s.scanMillisecondOrMinute(bgPos)
|
|
case 's':
|
|
return s.scanSecond(bgPos)
|
|
case 'h':
|
|
return s.scanHour(bgPos)
|
|
default:
|
|
return s.illegalToken()
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) scanNanosecond(bgPos int) token.Token {
|
|
s.readRune()
|
|
if s.ch != 's' {
|
|
return s.illegalToken()
|
|
}
|
|
s.readRune()
|
|
|
|
return token.Token{
|
|
Type: token.DURATION,
|
|
Text: string(s.data[bgPos:s.position]),
|
|
Position: s.newPosition(bgPos),
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) scanMicrosecond(bgPos int) token.Token {
|
|
s.readRune()
|
|
if s.ch != 's' {
|
|
return s.illegalToken()
|
|
}
|
|
|
|
s.readRune()
|
|
if !s.isDigit(s.ch) {
|
|
return token.Token{
|
|
Type: token.DURATION,
|
|
Text: string(s.data[bgPos:s.position]),
|
|
Position: s.newPosition(bgPos),
|
|
}
|
|
}
|
|
|
|
for s.isDigit(s.ch) {
|
|
s.readRune()
|
|
}
|
|
|
|
if s.ch != 'n' {
|
|
return s.illegalToken()
|
|
}
|
|
|
|
return s.scanNanosecond(bgPos)
|
|
|
|
}
|
|
|
|
func (s *Scanner) scanMillisecondOrMinute(bgPos int) token.Token {
|
|
s.readRune()
|
|
if s.ch != 's' { // minute
|
|
if s.ch == 0 || !s.isDigit(s.ch) {
|
|
return token.Token{
|
|
Type: token.DURATION,
|
|
Text: string(s.data[bgPos:s.position]),
|
|
Position: s.newPosition(bgPos),
|
|
}
|
|
}
|
|
|
|
return s.scanMinute(bgPos)
|
|
}
|
|
|
|
return s.scanMillisecond(bgPos)
|
|
}
|
|
|
|
func (s *Scanner) scanMillisecond(bgPos int) token.Token {
|
|
s.readRune()
|
|
if !s.isDigit(s.ch) {
|
|
return token.Token{
|
|
Type: token.DURATION,
|
|
Text: string(s.data[bgPos:s.position]),
|
|
Position: s.newPosition(bgPos),
|
|
}
|
|
}
|
|
|
|
for s.isDigit(s.ch) {
|
|
s.readRune()
|
|
}
|
|
|
|
switch s.ch {
|
|
case 'n':
|
|
return s.scanNanosecond(bgPos)
|
|
case 'µ':
|
|
return s.scanMicrosecond(bgPos)
|
|
default:
|
|
return s.illegalToken()
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) scanSecond(bgPos int) token.Token {
|
|
s.readRune()
|
|
if !s.isDigit(s.ch) {
|
|
return token.Token{
|
|
Type: token.DURATION,
|
|
Text: string(s.data[bgPos:s.position]),
|
|
Position: s.newPosition(bgPos),
|
|
}
|
|
}
|
|
|
|
for s.isDigit(s.ch) {
|
|
s.readRune()
|
|
}
|
|
|
|
switch s.ch {
|
|
case 'n':
|
|
return s.scanNanosecond(bgPos)
|
|
case 'µ':
|
|
return s.scanMicrosecond(bgPos)
|
|
case 'm':
|
|
s.readRune()
|
|
if s.ch != 's' {
|
|
return s.illegalToken()
|
|
}
|
|
return s.scanMillisecond(bgPos)
|
|
default:
|
|
return s.illegalToken()
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) scanMinute(bgPos int) token.Token {
|
|
if !s.isDigit(s.ch) {
|
|
return token.Token{
|
|
Type: token.DURATION,
|
|
Text: string(s.data[bgPos:s.position]),
|
|
Position: s.newPosition(bgPos),
|
|
}
|
|
}
|
|
|
|
for s.isDigit(s.ch) {
|
|
s.readRune()
|
|
}
|
|
|
|
switch s.ch {
|
|
case 'n':
|
|
return s.scanNanosecond(bgPos)
|
|
case 'µ':
|
|
return s.scanMicrosecond(bgPos)
|
|
case 'm':
|
|
s.readRune()
|
|
if s.ch != 's' {
|
|
return s.illegalToken()
|
|
}
|
|
return s.scanMillisecond(bgPos)
|
|
case 's':
|
|
return s.scanSecond(bgPos)
|
|
default:
|
|
return s.illegalToken()
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) scanHour(bgPos int) token.Token {
|
|
s.readRune()
|
|
if !s.isDigit(s.ch) {
|
|
return token.Token{
|
|
Type: token.DURATION,
|
|
Text: string(s.data[bgPos:s.position]),
|
|
Position: s.newPosition(bgPos),
|
|
}
|
|
}
|
|
|
|
for s.isDigit(s.ch) {
|
|
s.readRune()
|
|
}
|
|
|
|
switch s.ch {
|
|
case 'n':
|
|
return s.scanNanosecond(bgPos)
|
|
case 'µ':
|
|
return s.scanMicrosecond(bgPos)
|
|
case 'm':
|
|
return s.scanMillisecondOrMinute(bgPos)
|
|
case 's':
|
|
return s.scanSecond(bgPos)
|
|
default:
|
|
return s.illegalToken()
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) illegalToken() token.Token {
|
|
tok := token.NewIllegalToken(s.ch, s.newPosition(s.position))
|
|
s.readRune()
|
|
return tok
|
|
}
|
|
|
|
func (s *Scanner) scanIdent() token.Token {
|
|
position := s.position
|
|
for s.isIdentifierLetter(s.ch) || s.isDigit(s.ch) {
|
|
s.readRune()
|
|
}
|
|
|
|
ident := string(s.data[position:s.position])
|
|
|
|
if s.ch == ':' {
|
|
s.readRune()
|
|
return token.Token{
|
|
Type: token.KEY,
|
|
Text: string(s.data[position:s.position]),
|
|
Position: s.newPosition(position),
|
|
}
|
|
}
|
|
|
|
if ident == "interface" && s.ch == '{' && s.peekRune() == '}' {
|
|
s.readRune()
|
|
s.readRune()
|
|
return token.Token{
|
|
Type: token.ANY,
|
|
Text: string(s.data[position:s.position]),
|
|
Position: s.newPosition(position),
|
|
}
|
|
}
|
|
|
|
return token.Token{
|
|
Type: token.IDENT,
|
|
Text: ident,
|
|
Position: s.newPosition(position),
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) scanLetterSet() string {
|
|
position := s.position
|
|
for s.isLetter(s.ch) {
|
|
s.readRune()
|
|
}
|
|
return string(s.data[position:s.position])
|
|
}
|
|
|
|
func (s *Scanner) scanLineComment() token.Token {
|
|
position := s.position
|
|
for s.ch != '\n' && s.ch != 0 {
|
|
s.readRune()
|
|
}
|
|
|
|
return token.Token{
|
|
Type: token.COMMENT,
|
|
Text: string(s.data[position:s.position]),
|
|
Position: s.newPosition(position),
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) scanDocument() (token.Token, error) {
|
|
position := s.position
|
|
var documentMode = initMode
|
|
for {
|
|
switch s.ch {
|
|
case '*':
|
|
switch documentMode {
|
|
case documentHalfOpen:
|
|
documentMode = documentOpen // /*
|
|
case documentOpen, documentHalfClose:
|
|
documentMode = documentHalfClose // (?m)\/\*\*+
|
|
}
|
|
|
|
case 0:
|
|
switch documentMode {
|
|
case initMode, documentHalfOpen: // assert: dead code
|
|
return token.ErrorToken, s.assertExpected(token.EOF, token.MUL)
|
|
case documentOpen:
|
|
return token.ErrorToken, s.assertExpected(token.EOF, token.MUL)
|
|
case documentHalfClose:
|
|
return token.ErrorToken, s.assertExpected(token.EOF, token.QUO)
|
|
}
|
|
case '/':
|
|
switch documentMode {
|
|
case initMode: // /
|
|
documentMode = documentHalfOpen
|
|
case documentHalfOpen: // assert: dead code
|
|
return token.ErrorToken, s.assertExpected(token.QUO, token.MUL)
|
|
case documentHalfClose:
|
|
documentMode = documentClose // /*\*+*/
|
|
s.readRune()
|
|
tok := token.Token{
|
|
Type: token.DOCUMENT,
|
|
Text: string(s.data[position:s.position]),
|
|
Position: s.newPosition(position),
|
|
}
|
|
return tok, nil
|
|
}
|
|
}
|
|
s.readRune()
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) assertExpected(actual token.Type, expected ...token.Type) error {
|
|
var expects []string
|
|
for _, v := range expected {
|
|
expects = append(expects, fmt.Sprintf("'%s'", v.String()))
|
|
}
|
|
|
|
text := fmt.Sprint(s.positionAt().String(), " ", fmt.Sprintf(
|
|
"expected %s, got '%s'",
|
|
strings.Join(expects, " | "),
|
|
actual.String(),
|
|
))
|
|
|
|
return errors.New(text)
|
|
}
|
|
|
|
func (s *Scanner) assertExpectedString(actual string, expected ...string) error {
|
|
var expects []string
|
|
for _, v := range expected {
|
|
expects = append(expects, fmt.Sprintf("'%s'", v))
|
|
}
|
|
|
|
text := fmt.Sprint(s.positionAt().String(), " ", fmt.Sprintf(
|
|
"expected %s, got '%s'",
|
|
strings.Join(expects, " | "),
|
|
actual,
|
|
))
|
|
|
|
return errors.New(text)
|
|
}
|
|
|
|
func (s *Scanner) positionAt() token.Position {
|
|
return s.newPosition(s.position)
|
|
}
|
|
|
|
func (s *Scanner) newPosition(position int) token.Position {
|
|
line := s.lineCount()
|
|
return token.Position{
|
|
Filename: s.filename,
|
|
Line: line,
|
|
Column: position - s.lines[line-1],
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) lineCount() int {
|
|
return len(s.lines)
|
|
}
|
|
|
|
func (s *Scanner) skipWhiteSpace() {
|
|
for s.isWhiteSpace(s.ch) {
|
|
s.readRune()
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) isDigit(b rune) bool {
|
|
return b >= '0' && b <= '9'
|
|
}
|
|
|
|
func (s *Scanner) isLetter(b rune) bool {
|
|
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')
|
|
}
|
|
|
|
func (s *Scanner) isIdentifierLetter(b rune) bool {
|
|
if s.isLetter(b) {
|
|
return true
|
|
}
|
|
return b == '_'
|
|
}
|
|
|
|
func (s *Scanner) isWhiteSpace(b rune) bool {
|
|
if b == '\n' {
|
|
s.lines = append(s.lines, s.position)
|
|
}
|
|
return b == ' ' || b == '\t' || b == '\r' || b == '\f' || b == '\v' || b == '\n'
|
|
}
|
|
|
|
// MustNewScanner returns a new scanner for the given filename and data.
|
|
func MustNewScanner(filename string, src interface{}) *Scanner {
|
|
sc, err := NewScanner(filename, src)
|
|
if err != nil {
|
|
log.Fatalln(err)
|
|
}
|
|
return sc
|
|
}
|
|
|
|
// NewScanner returns a new scanner for the given filename and data.
|
|
func NewScanner(filename string, src interface{}) (*Scanner, error) {
|
|
data, err := readData(filename, src)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(data) == 0 {
|
|
return nil, missingInput
|
|
}
|
|
|
|
var runeList []rune
|
|
for _, r := range string(data) {
|
|
runeList = append(runeList, r)
|
|
}
|
|
|
|
filename = filepath.Base(filename)
|
|
s := &Scanner{
|
|
filename: filename,
|
|
size: len(runeList),
|
|
data: runeList,
|
|
lines: []int{-1},
|
|
readPosition: 0,
|
|
}
|
|
|
|
s.readRune()
|
|
return s, nil
|
|
}
|
|
|
|
func readData(filename string, src interface{}) ([]byte, error) {
|
|
if strings.HasSuffix(filename, ".api") && pathx.FileExists(filename) {
|
|
data, err := os.ReadFile(filename)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return data, nil
|
|
}
|
|
|
|
switch v := src.(type) {
|
|
case []byte:
|
|
return v, nil
|
|
case *bytes.Buffer:
|
|
return v.Bytes(), nil
|
|
case string:
|
|
return []byte(v), nil
|
|
default:
|
|
return nil, fmt.Errorf("unsupported type: %T", src)
|
|
}
|
|
}
|