Lexer reads runes from buffer
This commit is contained in:
parent
9b787b749b
commit
6b9a8bebbc
|
@ -5,21 +5,20 @@ package lexer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
|
||||||
"unicode/utf8"
|
"github.com/localhots/punk/buffer"
|
||||||
)
|
)
|
||||||
|
|
||||||
type (
|
type (
|
||||||
// Holds the state of the scanner
|
// Holds the state of the scanner
|
||||||
Lexer struct {
|
Lexer struct {
|
||||||
input string // The string being scanned
|
input buffer.Bufferer
|
||||||
|
stack []rune
|
||||||
|
pos int
|
||||||
lineNum int // Line number
|
lineNum int // Line number
|
||||||
colNum int // Column number
|
colNum int // Column number
|
||||||
pos int // Current position in the input
|
|
||||||
start int // Start position of this item
|
|
||||||
startLine int // Start line of this item
|
startLine int // Start line of this item
|
||||||
startCol int // Start column of this item
|
startCol int // Start column of this item
|
||||||
width int // Width of last rune read from input
|
|
||||||
items chan Item // Channel of scanned items
|
items chan Item // Channel of scanned items
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,7 +26,6 @@ type (
|
||||||
Item struct {
|
Item struct {
|
||||||
Token Token // The type of this item
|
Token Token // The type of this item
|
||||||
Val string // The value of this item
|
Val string // The value of this item
|
||||||
Pos int // The starting position, in bytes, of this item in the input string
|
|
||||||
Line int // Line number
|
Line int // Line number
|
||||||
Column int // Column number
|
Column int // Column number
|
||||||
}
|
}
|
||||||
|
@ -61,7 +59,7 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
// Creates a new scanner for the input string
|
// Creates a new scanner for the input string
|
||||||
func New(input string) *Lexer {
|
func New(input buffer.Bufferer) *Lexer {
|
||||||
return &Lexer{
|
return &Lexer{
|
||||||
input: input,
|
input: input,
|
||||||
items: make(chan Item),
|
items: make(chan Item),
|
||||||
|
@ -86,14 +84,12 @@ func (l *Lexer) NextItem() (item Item, ok bool) {
|
||||||
|
|
||||||
// Returns the next rune in the input
|
// Returns the next rune in the input
|
||||||
func (l *Lexer) next() rune {
|
func (l *Lexer) next() rune {
|
||||||
if l.pos >= len(l.input) {
|
var r rune
|
||||||
l.width = 0
|
if l.pos > len(l.stack)-1 {
|
||||||
return 0
|
l.stack = append(l.stack, l.input.Next())
|
||||||
}
|
}
|
||||||
|
r = l.stack[l.pos]
|
||||||
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
l.pos++
|
||||||
l.width = w
|
|
||||||
l.pos += l.width
|
|
||||||
|
|
||||||
// Counting lines and columns - token coordinates
|
// Counting lines and columns - token coordinates
|
||||||
if r == '\n' {
|
if r == '\n' {
|
||||||
|
@ -108,36 +104,43 @@ func (l *Lexer) next() rune {
|
||||||
|
|
||||||
// Returns the value for the next token
|
// Returns the value for the next token
|
||||||
func (l *Lexer) val() string {
|
func (l *Lexer) val() string {
|
||||||
return l.input[l.start:l.pos]
|
return string(l.stack[:l.pos])
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns but does not consume the next rune in the input
|
// Returns but does not consume the next rune in the input
|
||||||
func (l *Lexer) peek() rune {
|
func (l *Lexer) peek() rune {
|
||||||
r := l.next()
|
r := l.next()
|
||||||
l.backup()
|
l.backup(1)
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tells if the following input matches the given string
|
// Tells if the following input matches the given string
|
||||||
func (l *Lexer) acceptString(s string) (ok bool) {
|
func (l *Lexer) acceptString(s string) (ok bool) {
|
||||||
if strings.HasPrefix(l.input[l.pos:], s) {
|
for i, c := range s {
|
||||||
l.pos += len(s)
|
if l.next() != c {
|
||||||
return true
|
l.backup(i + 1)
|
||||||
}
|
|
||||||
return false
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Steps back one rune
|
// Steps back one rune
|
||||||
// Backup is never called right after a new line char so we don't care
|
// Backup is never called right after a new line char so we don't care
|
||||||
// about the line number. This is also true for the ignore function
|
// about the line number. This is also true for the ignore function
|
||||||
func (l *Lexer) backup() {
|
func (l *Lexer) backup(n int) {
|
||||||
l.pos -= l.width
|
l.pos -= n
|
||||||
l.colNum--
|
l.colNum -= n
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skips over the pending input before this point
|
// Skips over the pending input before this point
|
||||||
func (l *Lexer) ignore() {
|
func (l *Lexer) ignore() {
|
||||||
l.start = l.pos
|
if l.pos < len(l.stack) {
|
||||||
|
l.stack = l.stack[l.pos:]
|
||||||
|
} else {
|
||||||
|
l.stack = []rune{}
|
||||||
|
}
|
||||||
|
l.pos = 0
|
||||||
l.startLine = l.lineNum
|
l.startLine = l.lineNum
|
||||||
l.startCol = l.colNum
|
l.startCol = l.colNum
|
||||||
}
|
}
|
||||||
|
@ -151,7 +154,6 @@ func (l *Lexer) emit(t Token) {
|
||||||
l.items <- Item{
|
l.items <- Item{
|
||||||
Token: t,
|
Token: t,
|
||||||
Val: l.val(),
|
Val: l.val(),
|
||||||
Pos: l.start,
|
|
||||||
Line: l.startLine,
|
Line: l.startLine,
|
||||||
Column: l.startCol,
|
Column: l.startCol,
|
||||||
}
|
}
|
||||||
|
@ -163,7 +165,6 @@ func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
|
||||||
l.items <- Item{
|
l.items <- Item{
|
||||||
Token: Error,
|
Token: Error,
|
||||||
Val: fmt.Sprintf(format, args...),
|
Val: fmt.Sprintf(format, args...),
|
||||||
Pos: l.start,
|
|
||||||
Line: l.startLine,
|
Line: l.startLine,
|
||||||
Column: l.startCol,
|
Column: l.startCol,
|
||||||
}
|
}
|
||||||
|
@ -180,13 +181,13 @@ func lexInitial(l *Lexer) stateFn {
|
||||||
case ' ', '\t', '\n':
|
case ' ', '\t', '\n':
|
||||||
l.ignore()
|
l.ignore()
|
||||||
case 'n':
|
case 'n':
|
||||||
l.backup()
|
l.backup(1)
|
||||||
return lexNull(l)
|
return lexNull(l)
|
||||||
case 't', 'f':
|
case 't', 'f':
|
||||||
l.backup()
|
l.backup(1)
|
||||||
return lexBool(l)
|
return lexBool(l)
|
||||||
case '1', '2', '3', '4', '5', '6', '7', '8', '9', '0':
|
case '1', '2', '3', '4', '5', '6', '7', '8', '9', '0':
|
||||||
l.backup()
|
l.backup(1)
|
||||||
return lexNumber(l)
|
return lexNumber(l)
|
||||||
case '"':
|
case '"':
|
||||||
return lexString(l)
|
return lexString(l)
|
||||||
|
@ -237,7 +238,7 @@ func lexNumber(l *Lexer) stateFn {
|
||||||
case '.':
|
case '.':
|
||||||
numDots++
|
numDots++
|
||||||
default:
|
default:
|
||||||
l.backup()
|
l.backup(1)
|
||||||
if numDots > 1 || r == '.' {
|
if numDots > 1 || r == '.' {
|
||||||
return l.errorf("Invalid number: %q", l.val())
|
return l.errorf("Invalid number: %q", l.val())
|
||||||
}
|
}
|
||||||
|
@ -260,7 +261,7 @@ func lexString(l *Lexer) stateFn {
|
||||||
escaped = false
|
escaped = false
|
||||||
} else {
|
} else {
|
||||||
// Going before closing quote and emitting
|
// Going before closing quote and emitting
|
||||||
l.backup()
|
l.backup(1)
|
||||||
l.emit(String)
|
l.emit(String)
|
||||||
// Skipping closing quote
|
// Skipping closing quote
|
||||||
l.next()
|
l.next()
|
||||||
|
|
|
@ -5,6 +5,7 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/localhots/punk/buffer"
|
||||||
"github.com/localhots/punk/lexer"
|
"github.com/localhots/punk/lexer"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -20,8 +21,9 @@ type (
|
||||||
|
|
||||||
// Creates a new parser
|
// Creates a new parser
|
||||||
func New(b []byte, sels []string) *Parser {
|
func New(b []byte, sels []string) *Parser {
|
||||||
|
buf := buffer.NewDataBuffer(b)
|
||||||
return &Parser{
|
return &Parser{
|
||||||
lex: lexer.New(string(b)),
|
lex: lexer.New(buf),
|
||||||
ctx: &context{
|
ctx: &context{
|
||||||
exps: []expectation{},
|
exps: []expectation{},
|
||||||
},
|
},
|
||||||
|
|
Loading…
Reference in New Issue