1
0
Fork 0

Lexer reads runes from buffer

This commit is contained in:
Gregory Eremin 2015-02-18 20:26:53 +07:00
parent 9b787b749b
commit 6b9a8bebbc
2 changed files with 36 additions and 33 deletions

View File

@ -5,21 +5,20 @@ package lexer
import ( import (
"fmt" "fmt"
"strings"
"unicode/utf8" "github.com/localhots/punk/buffer"
) )
type ( type (
// Holds the state of the scanner // Holds the state of the scanner
Lexer struct { Lexer struct {
input string // The string being scanned input buffer.Bufferer
stack []rune
pos int
lineNum int // Line number lineNum int // Line number
colNum int // Column number colNum int // Column number
pos int // Current position in the input
start int // Start position of this item
startLine int // Start line of this item startLine int // Start line of this item
startCol int // Start column of this item startCol int // Start column of this item
width int // Width of last rune read from input
items chan Item // Channel of scanned items items chan Item // Channel of scanned items
} }
@ -27,7 +26,6 @@ type (
Item struct { Item struct {
Token Token // The type of this item Token Token // The type of this item
Val string // The value of this item Val string // The value of this item
Pos int // The starting position, in bytes, of this item in the input string
Line int // Line number Line int // Line number
Column int // Column number Column int // Column number
} }
@ -61,7 +59,7 @@ const (
) )
// Creates a new scanner for the input string // Creates a new scanner for the input string
func New(input string) *Lexer { func New(input buffer.Bufferer) *Lexer {
return &Lexer{ return &Lexer{
input: input, input: input,
items: make(chan Item), items: make(chan Item),
@ -86,14 +84,12 @@ func (l *Lexer) NextItem() (item Item, ok bool) {
// Returns the next rune in the input // Returns the next rune in the input
func (l *Lexer) next() rune { func (l *Lexer) next() rune {
if l.pos >= len(l.input) { var r rune
l.width = 0 if l.pos > len(l.stack)-1 {
return 0 l.stack = append(l.stack, l.input.Next())
} }
r = l.stack[l.pos]
r, w := utf8.DecodeRuneInString(l.input[l.pos:]) l.pos++
l.width = w
l.pos += l.width
// Counting lines and columns - token coordinates // Counting lines and columns - token coordinates
if r == '\n' { if r == '\n' {
@ -108,36 +104,43 @@ func (l *Lexer) next() rune {
// Returns the value for the next token // Returns the value for the next token
func (l *Lexer) val() string { func (l *Lexer) val() string {
return l.input[l.start:l.pos] return string(l.stack[:l.pos])
} }
// Returns but does not consume the next rune in the input // Returns but does not consume the next rune in the input
func (l *Lexer) peek() rune { func (l *Lexer) peek() rune {
r := l.next() r := l.next()
l.backup() l.backup(1)
return r return r
} }
// Tells if the following input matches the given string // Tells if the following input matches the given string
func (l *Lexer) acceptString(s string) (ok bool) { func (l *Lexer) acceptString(s string) (ok bool) {
if strings.HasPrefix(l.input[l.pos:], s) { for i, c := range s {
l.pos += len(s) if l.next() != c {
return true l.backup(i + 1)
return false
}
} }
return false return true
} }
// Steps back one rune // Steps back one rune
// Backup is never called right after a new line char so we don't care // Backup is never called right after a new line char so we don't care
// about the line number. This is also true for the ignore function // about the line number. This is also true for the ignore function
func (l *Lexer) backup() { func (l *Lexer) backup(n int) {
l.pos -= l.width l.pos -= n
l.colNum-- l.colNum -= n
} }
// Skips over the pending input before this point // Skips over the pending input before this point
func (l *Lexer) ignore() { func (l *Lexer) ignore() {
l.start = l.pos if l.pos < len(l.stack) {
l.stack = l.stack[l.pos:]
} else {
l.stack = []rune{}
}
l.pos = 0
l.startLine = l.lineNum l.startLine = l.lineNum
l.startCol = l.colNum l.startCol = l.colNum
} }
@ -151,7 +154,6 @@ func (l *Lexer) emit(t Token) {
l.items <- Item{ l.items <- Item{
Token: t, Token: t,
Val: l.val(), Val: l.val(),
Pos: l.start,
Line: l.startLine, Line: l.startLine,
Column: l.startCol, Column: l.startCol,
} }
@ -163,7 +165,6 @@ func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
l.items <- Item{ l.items <- Item{
Token: Error, Token: Error,
Val: fmt.Sprintf(format, args...), Val: fmt.Sprintf(format, args...),
Pos: l.start,
Line: l.startLine, Line: l.startLine,
Column: l.startCol, Column: l.startCol,
} }
@ -180,13 +181,13 @@ func lexInitial(l *Lexer) stateFn {
case ' ', '\t', '\n': case ' ', '\t', '\n':
l.ignore() l.ignore()
case 'n': case 'n':
l.backup() l.backup(1)
return lexNull(l) return lexNull(l)
case 't', 'f': case 't', 'f':
l.backup() l.backup(1)
return lexBool(l) return lexBool(l)
case '1', '2', '3', '4', '5', '6', '7', '8', '9', '0': case '1', '2', '3', '4', '5', '6', '7', '8', '9', '0':
l.backup() l.backup(1)
return lexNumber(l) return lexNumber(l)
case '"': case '"':
return lexString(l) return lexString(l)
@ -237,7 +238,7 @@ func lexNumber(l *Lexer) stateFn {
case '.': case '.':
numDots++ numDots++
default: default:
l.backup() l.backup(1)
if numDots > 1 || r == '.' { if numDots > 1 || r == '.' {
return l.errorf("Invalid number: %q", l.val()) return l.errorf("Invalid number: %q", l.val())
} }
@ -260,7 +261,7 @@ func lexString(l *Lexer) stateFn {
escaped = false escaped = false
} else { } else {
// Going before closing quote and emitting // Going before closing quote and emitting
l.backup() l.backup(1)
l.emit(String) l.emit(String)
// Skipping closing quote // Skipping closing quote
l.next() l.next()

View File

@ -5,6 +5,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"github.com/localhots/punk/buffer"
"github.com/localhots/punk/lexer" "github.com/localhots/punk/lexer"
) )
@ -20,8 +21,9 @@ type (
// Creates a new parser // Creates a new parser
func New(b []byte, sels []string) *Parser { func New(b []byte, sels []string) *Parser {
buf := buffer.NewDataBuffer(b)
return &Parser{ return &Parser{
lex: lexer.New(string(b)), lex: lexer.New(buf),
ctx: &context{ ctx: &context{
exps: []expectation{}, exps: []expectation{},
}, },