Lexer reads runes from buffer
This commit is contained in:
		
							parent
							
								
									9b787b749b
								
							
						
					
					
						commit
						6b9a8bebbc
					
				@ -5,21 +5,20 @@ package lexer
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"fmt"
 | 
						"fmt"
 | 
				
			||||||
	"strings"
 | 
					
 | 
				
			||||||
	"unicode/utf8"
 | 
						"github.com/localhots/punk/buffer"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type (
 | 
					type (
 | 
				
			||||||
	// Holds the state of the scanner
 | 
						// Holds the state of the scanner
 | 
				
			||||||
	Lexer struct {
 | 
						Lexer struct {
 | 
				
			||||||
		input     string    // The string being scanned
 | 
							input     buffer.Bufferer
 | 
				
			||||||
 | 
							stack     []rune
 | 
				
			||||||
 | 
							pos       int
 | 
				
			||||||
		lineNum   int       // Line number
 | 
							lineNum   int       // Line number
 | 
				
			||||||
		colNum    int       // Column number
 | 
							colNum    int       // Column number
 | 
				
			||||||
		pos       int       // Current position in the input
 | 
					 | 
				
			||||||
		start     int       // Start position of this item
 | 
					 | 
				
			||||||
		startLine int       // Start line of this item
 | 
							startLine int       // Start line of this item
 | 
				
			||||||
		startCol  int       // Start column of this item
 | 
							startCol  int       // Start column of this item
 | 
				
			||||||
		width     int       // Width of last rune read from input
 | 
					 | 
				
			||||||
		items     chan Item // Channel of scanned items
 | 
							items     chan Item // Channel of scanned items
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -27,7 +26,6 @@ type (
 | 
				
			|||||||
	Item struct {
 | 
						Item struct {
 | 
				
			||||||
		Token  Token  // The type of this item
 | 
							Token  Token  // The type of this item
 | 
				
			||||||
		Val    string // The value of this item
 | 
							Val    string // The value of this item
 | 
				
			||||||
		Pos    int    // The starting position, in bytes, of this item in the input string
 | 
					 | 
				
			||||||
		Line   int    // Line number
 | 
							Line   int    // Line number
 | 
				
			||||||
		Column int    // Column number
 | 
							Column int    // Column number
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@ -61,7 +59,7 @@ const (
 | 
				
			|||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Creates a new scanner for the input string
 | 
					// Creates a new scanner for the input string
 | 
				
			||||||
func New(input string) *Lexer {
 | 
					func New(input buffer.Bufferer) *Lexer {
 | 
				
			||||||
	return &Lexer{
 | 
						return &Lexer{
 | 
				
			||||||
		input:   input,
 | 
							input:   input,
 | 
				
			||||||
		items:   make(chan Item),
 | 
							items:   make(chan Item),
 | 
				
			||||||
@ -86,14 +84,12 @@ func (l *Lexer) NextItem() (item Item, ok bool) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
// Returns the next rune in the input
 | 
					// Returns the next rune in the input
 | 
				
			||||||
func (l *Lexer) next() rune {
 | 
					func (l *Lexer) next() rune {
 | 
				
			||||||
	if l.pos >= len(l.input) {
 | 
						var r rune
 | 
				
			||||||
		l.width = 0
 | 
						if l.pos > len(l.stack)-1 {
 | 
				
			||||||
		return 0
 | 
							l.stack = append(l.stack, l.input.Next())
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						r = l.stack[l.pos]
 | 
				
			||||||
	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
 | 
						l.pos++
 | 
				
			||||||
	l.width = w
 | 
					 | 
				
			||||||
	l.pos += l.width
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Counting lines and columns - token coordinates
 | 
						// Counting lines and columns - token coordinates
 | 
				
			||||||
	if r == '\n' {
 | 
						if r == '\n' {
 | 
				
			||||||
@ -108,36 +104,43 @@ func (l *Lexer) next() rune {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
// Returns the value for the next token
 | 
					// Returns the value for the next token
 | 
				
			||||||
func (l *Lexer) val() string {
 | 
					func (l *Lexer) val() string {
 | 
				
			||||||
	return l.input[l.start:l.pos]
 | 
						return string(l.stack[:l.pos])
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Returns but does not consume the next rune in the input
 | 
					// Returns but does not consume the next rune in the input
 | 
				
			||||||
func (l *Lexer) peek() rune {
 | 
					func (l *Lexer) peek() rune {
 | 
				
			||||||
	r := l.next()
 | 
						r := l.next()
 | 
				
			||||||
	l.backup()
 | 
						l.backup(1)
 | 
				
			||||||
	return r
 | 
						return r
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Tells if the following input matches the given string
 | 
					// Tells if the following input matches the given string
 | 
				
			||||||
func (l *Lexer) acceptString(s string) (ok bool) {
 | 
					func (l *Lexer) acceptString(s string) (ok bool) {
 | 
				
			||||||
	if strings.HasPrefix(l.input[l.pos:], s) {
 | 
						for i, c := range s {
 | 
				
			||||||
		l.pos += len(s)
 | 
							if l.next() != c {
 | 
				
			||||||
		return true
 | 
								l.backup(i + 1)
 | 
				
			||||||
 | 
								return false
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return false
 | 
						return true
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Steps back one rune
 | 
					// Steps back one rune
 | 
				
			||||||
// Backup is never called right after a new line char so we don't care
 | 
					// Backup is never called right after a new line char so we don't care
 | 
				
			||||||
// about the line number. This is also true for the ignore function
 | 
					// about the line number. This is also true for the ignore function
 | 
				
			||||||
func (l *Lexer) backup() {
 | 
					func (l *Lexer) backup(n int) {
 | 
				
			||||||
	l.pos -= l.width
 | 
						l.pos -= n
 | 
				
			||||||
	l.colNum--
 | 
						l.colNum -= n
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Skips over the pending input before this point
 | 
					// Skips over the pending input before this point
 | 
				
			||||||
func (l *Lexer) ignore() {
 | 
					func (l *Lexer) ignore() {
 | 
				
			||||||
	l.start = l.pos
 | 
						if l.pos < len(l.stack) {
 | 
				
			||||||
 | 
							l.stack = l.stack[l.pos:]
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							l.stack = []rune{}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						l.pos = 0
 | 
				
			||||||
	l.startLine = l.lineNum
 | 
						l.startLine = l.lineNum
 | 
				
			||||||
	l.startCol = l.colNum
 | 
						l.startCol = l.colNum
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -151,7 +154,6 @@ func (l *Lexer) emit(t Token) {
 | 
				
			|||||||
	l.items <- Item{
 | 
						l.items <- Item{
 | 
				
			||||||
		Token:  t,
 | 
							Token:  t,
 | 
				
			||||||
		Val:    l.val(),
 | 
							Val:    l.val(),
 | 
				
			||||||
		Pos:    l.start,
 | 
					 | 
				
			||||||
		Line:   l.startLine,
 | 
							Line:   l.startLine,
 | 
				
			||||||
		Column: l.startCol,
 | 
							Column: l.startCol,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@ -163,7 +165,6 @@ func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
 | 
				
			|||||||
	l.items <- Item{
 | 
						l.items <- Item{
 | 
				
			||||||
		Token:  Error,
 | 
							Token:  Error,
 | 
				
			||||||
		Val:    fmt.Sprintf(format, args...),
 | 
							Val:    fmt.Sprintf(format, args...),
 | 
				
			||||||
		Pos:    l.start,
 | 
					 | 
				
			||||||
		Line:   l.startLine,
 | 
							Line:   l.startLine,
 | 
				
			||||||
		Column: l.startCol,
 | 
							Column: l.startCol,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@ -180,13 +181,13 @@ func lexInitial(l *Lexer) stateFn {
 | 
				
			|||||||
		case ' ', '\t', '\n':
 | 
							case ' ', '\t', '\n':
 | 
				
			||||||
			l.ignore()
 | 
								l.ignore()
 | 
				
			||||||
		case 'n':
 | 
							case 'n':
 | 
				
			||||||
			l.backup()
 | 
								l.backup(1)
 | 
				
			||||||
			return lexNull(l)
 | 
								return lexNull(l)
 | 
				
			||||||
		case 't', 'f':
 | 
							case 't', 'f':
 | 
				
			||||||
			l.backup()
 | 
								l.backup(1)
 | 
				
			||||||
			return lexBool(l)
 | 
								return lexBool(l)
 | 
				
			||||||
		case '1', '2', '3', '4', '5', '6', '7', '8', '9', '0':
 | 
							case '1', '2', '3', '4', '5', '6', '7', '8', '9', '0':
 | 
				
			||||||
			l.backup()
 | 
								l.backup(1)
 | 
				
			||||||
			return lexNumber(l)
 | 
								return lexNumber(l)
 | 
				
			||||||
		case '"':
 | 
							case '"':
 | 
				
			||||||
			return lexString(l)
 | 
								return lexString(l)
 | 
				
			||||||
@ -237,7 +238,7 @@ func lexNumber(l *Lexer) stateFn {
 | 
				
			|||||||
		case '.':
 | 
							case '.':
 | 
				
			||||||
			numDots++
 | 
								numDots++
 | 
				
			||||||
		default:
 | 
							default:
 | 
				
			||||||
			l.backup()
 | 
								l.backup(1)
 | 
				
			||||||
			if numDots > 1 || r == '.' {
 | 
								if numDots > 1 || r == '.' {
 | 
				
			||||||
				return l.errorf("Invalid number: %q", l.val())
 | 
									return l.errorf("Invalid number: %q", l.val())
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
@ -260,7 +261,7 @@ func lexString(l *Lexer) stateFn {
 | 
				
			|||||||
				escaped = false
 | 
									escaped = false
 | 
				
			||||||
			} else {
 | 
								} else {
 | 
				
			||||||
				// Going before closing quote and emitting
 | 
									// Going before closing quote and emitting
 | 
				
			||||||
				l.backup()
 | 
									l.backup(1)
 | 
				
			||||||
				l.emit(String)
 | 
									l.emit(String)
 | 
				
			||||||
				// Skipping closing quote
 | 
									// Skipping closing quote
 | 
				
			||||||
				l.next()
 | 
									l.next()
 | 
				
			||||||
 | 
				
			|||||||
@ -5,6 +5,7 @@ import (
 | 
				
			|||||||
	"strconv"
 | 
						"strconv"
 | 
				
			||||||
	"strings"
 | 
						"strings"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"github.com/localhots/punk/buffer"
 | 
				
			||||||
	"github.com/localhots/punk/lexer"
 | 
						"github.com/localhots/punk/lexer"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -20,8 +21,9 @@ type (
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
// Creates a new parser
 | 
					// Creates a new parser
 | 
				
			||||||
func New(b []byte, sels []string) *Parser {
 | 
					func New(b []byte, sels []string) *Parser {
 | 
				
			||||||
 | 
						buf := buffer.NewDataBuffer(b)
 | 
				
			||||||
	return &Parser{
 | 
						return &Parser{
 | 
				
			||||||
		lex: lexer.New(string(b)),
 | 
							lex: lexer.New(buf),
 | 
				
			||||||
		ctx: &context{
 | 
							ctx: &context{
 | 
				
			||||||
			exps: []expectation{},
 | 
								exps: []expectation{},
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user