JSON lexer
This commit is contained in:
		
							parent
							
								
									9c6f27bcc5
								
							
						
					
					
						commit
						0f04fa41e8
					
				
							
								
								
									
										10
									
								
								lex.go
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								lex.go
									
									
									
									
									
								
							@ -1,6 +1,7 @@
 | 
			
		||||
package main
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"os"
 | 
			
		||||
 | 
			
		||||
@ -12,5 +13,12 @@ func main() {
 | 
			
		||||
	b, _ := ioutil.ReadAll(f)
 | 
			
		||||
 | 
			
		||||
	lex := lexer.New("foo", string(b))
 | 
			
		||||
	lex.Run()
 | 
			
		||||
	go lex.Run()
 | 
			
		||||
	for {
 | 
			
		||||
		i := lex.NextItem()
 | 
			
		||||
		fmt.Println(i)
 | 
			
		||||
		if i.String() == "EOF" {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										115
									
								
								lexer/lexer.go
									
									
									
									
									
								
							
							
						
						
									
										115
									
								
								lexer/lexer.go
									
									
									
									
									
								
							@ -11,22 +11,22 @@ type (
 | 
			
		||||
	Lexer struct {
 | 
			
		||||
		input      string    // the string being scanned
 | 
			
		||||
		state      stateFn   // the next lexing function to enter
 | 
			
		||||
		pos        Pos       // current position in the input
 | 
			
		||||
		start      Pos       // start position of this item
 | 
			
		||||
		width      Pos       // width of last rune read from input
 | 
			
		||||
		lastPos    Pos       // position of most recent item returned by nextItem
 | 
			
		||||
		items      chan item // channel of scanned items
 | 
			
		||||
		lineNum    int       // Line number
 | 
			
		||||
		pos        int       // current position in the input
 | 
			
		||||
		start      int       // start position of this item
 | 
			
		||||
		width      int       // width of last rune read from input
 | 
			
		||||
		lastPos    int       // position of most recent item returned by nextItem
 | 
			
		||||
		items      chan Item // channel of scanned items
 | 
			
		||||
		parenDepth int       // nesting depth of ( ) exprs
 | 
			
		||||
	}
 | 
			
		||||
	Pos int
 | 
			
		||||
 | 
			
		||||
	// stateFn represents the state of the scanner as a function that returns the next state.
 | 
			
		||||
	stateFn func(*Lexer) stateFn
 | 
			
		||||
 | 
			
		||||
	// item represents a token or text string returned from the scanner.
 | 
			
		||||
	item struct {
 | 
			
		||||
	Item struct {
 | 
			
		||||
		typ itemType // The type of this item.
 | 
			
		||||
		pos Pos      // The starting position, in bytes, of this item in the input string.
 | 
			
		||||
		pos int      // The starting position, in bytes, of this item in the input string.
 | 
			
		||||
		val string   // The value of this item.
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@ -38,7 +38,6 @@ const (
 | 
			
		||||
	// Special
 | 
			
		||||
	itemError itemType = iota // error occurred; value is text of error
 | 
			
		||||
	itemEOF
 | 
			
		||||
	itemSpace
 | 
			
		||||
 | 
			
		||||
	// Symbols
 | 
			
		||||
	itemBraceOpen    // {
 | 
			
		||||
@ -54,27 +53,17 @@ const (
 | 
			
		||||
	itemBool   // true, false
 | 
			
		||||
	itemNumber // 0, 2.5
 | 
			
		||||
	itemString // "foo"
 | 
			
		||||
	itemArray  // [1, 2, 3]
 | 
			
		||||
	itemObject // {"a": 1, "b": 2}
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	EOF = -1
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	itemMap = map[string]itemType{
 | 
			
		||||
		"null":  itemNull,
 | 
			
		||||
		"true":  itemBool,
 | 
			
		||||
		"false": itemBool,
 | 
			
		||||
	}
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// lex creates a new scanner for the input string.
 | 
			
		||||
func New(name, input string) *Lexer {
 | 
			
		||||
	l := &Lexer{
 | 
			
		||||
		input: input,
 | 
			
		||||
		items: make(chan item),
 | 
			
		||||
		items: make(chan Item),
 | 
			
		||||
	}
 | 
			
		||||
	return l
 | 
			
		||||
}
 | 
			
		||||
@ -86,20 +75,47 @@ func (l *Lexer) Run() {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (l *Lexer) NextItem() Item {
 | 
			
		||||
	item := <-l.items
 | 
			
		||||
	l.lastPos = item.pos
 | 
			
		||||
	return item
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// Lexer stuff
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
func (i item) String() string {
 | 
			
		||||
	switch {
 | 
			
		||||
	case i.typ == itemEOF:
 | 
			
		||||
func (i Item) String() string {
 | 
			
		||||
	switch i.typ {
 | 
			
		||||
	case itemEOF:
 | 
			
		||||
		return "EOF"
 | 
			
		||||
	case i.typ == itemError:
 | 
			
		||||
		return i.val
 | 
			
		||||
	case len(i.val) > 10:
 | 
			
		||||
		return fmt.Sprintf("%.10q...", i.val)
 | 
			
		||||
	case itemError:
 | 
			
		||||
		return "Error: " + i.val
 | 
			
		||||
	case itemBraceOpen:
 | 
			
		||||
		return "{"
 | 
			
		||||
	case itemBraceClose:
 | 
			
		||||
		return "}"
 | 
			
		||||
	case itemBracketOpen:
 | 
			
		||||
		return "["
 | 
			
		||||
	case itemBracketClose:
 | 
			
		||||
		return "]"
 | 
			
		||||
	case itemQuote:
 | 
			
		||||
		return "\""
 | 
			
		||||
	case itemColon:
 | 
			
		||||
		return ":"
 | 
			
		||||
	case itemComma:
 | 
			
		||||
		return ","
 | 
			
		||||
	case itemNull:
 | 
			
		||||
		return "NULL"
 | 
			
		||||
	case itemBool:
 | 
			
		||||
		return "Bool: " + i.val
 | 
			
		||||
	case itemNumber:
 | 
			
		||||
		return "Number: " + i.val
 | 
			
		||||
	case itemString:
 | 
			
		||||
		return "String: " + i.val
 | 
			
		||||
	default:
 | 
			
		||||
		panic("Unreachable")
 | 
			
		||||
	}
 | 
			
		||||
	return fmt.Sprintf("%q", i.val)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// next returns the next rune in the input.
 | 
			
		||||
@ -109,7 +125,7 @@ func (l *Lexer) next() rune {
 | 
			
		||||
		return EOF
 | 
			
		||||
	}
 | 
			
		||||
	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
 | 
			
		||||
	l.width = Pos(w)
 | 
			
		||||
	l.width = w
 | 
			
		||||
	l.pos += l.width
 | 
			
		||||
	return r
 | 
			
		||||
}
 | 
			
		||||
@ -128,7 +144,7 @@ func (l *Lexer) backup() {
 | 
			
		||||
 | 
			
		||||
// emit passes an item back to the client.
 | 
			
		||||
func (l *Lexer) emit(t itemType) {
 | 
			
		||||
	l.items <- item{t, l.start, l.input[l.start:l.pos]}
 | 
			
		||||
	l.items <- Item{t, l.start, l.input[l.start:l.pos]}
 | 
			
		||||
	l.start = l.pos
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -137,48 +153,17 @@ func (l *Lexer) ignore() {
 | 
			
		||||
	l.start = l.pos
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// accept consumes the next rune if it's from the valid set.
 | 
			
		||||
func (l *Lexer) accept(valid string) bool {
 | 
			
		||||
	if strings.IndexRune(valid, l.next()) >= 0 {
 | 
			
		||||
func (l *Lexer) acceptString(s string) (ok bool) {
 | 
			
		||||
	if strings.HasPrefix(l.input[l.pos:], s) {
 | 
			
		||||
		l.pos += len(s)
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	l.backup()
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// acceptRun consumes a run of runes from the valid set.
 | 
			
		||||
func (l *Lexer) acceptRun(valid string) {
 | 
			
		||||
	for strings.IndexRune(valid, l.next()) >= 0 {
 | 
			
		||||
	}
 | 
			
		||||
	l.backup()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// lineNumber reports which line we're on, based on the position of
 | 
			
		||||
// the previous item returned by nextItem. Doing it this way
 | 
			
		||||
// means we don't have to worry about peek double counting.
 | 
			
		||||
func (l *Lexer) lineNumber() int {
 | 
			
		||||
	return 1 + strings.Count(l.input[:l.lastPos], "\n")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// errorf returns an error token and terminates the scan by passing
 | 
			
		||||
// back a nil pointer that will be the next state, terminating l.nextItem.
 | 
			
		||||
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
 | 
			
		||||
	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
 | 
			
		||||
	l.items <- Item{itemError, l.start, fmt.Sprintf(format, args...)}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// nextItem returns the next item from the input.
 | 
			
		||||
func (l *Lexer) nextItem() item {
 | 
			
		||||
	item := <-l.items
 | 
			
		||||
	l.lastPos = item.pos
 | 
			
		||||
	return item
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// Helpers
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
// isSpace reports whether r is a space character.
 | 
			
		||||
func isSpace(r rune) bool {
 | 
			
		||||
	return r == ' ' || r == '\t'
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										127
									
								
								lexer/state.go
									
									
									
									
									
								
							
							
						
						
									
										127
									
								
								lexer/state.go
									
									
									
									
									
								
							@ -1,12 +1,42 @@
 | 
			
		||||
package lexer
 | 
			
		||||
 | 
			
		||||
import "strings"
 | 
			
		||||
 | 
			
		||||
func lexInitial(l *Lexer) stateFn {
 | 
			
		||||
loop:
 | 
			
		||||
	for {
 | 
			
		||||
		switch l.next() {
 | 
			
		||||
		switch r := l.next(); r {
 | 
			
		||||
		case ' ', '\t':
 | 
			
		||||
			return lexSpace(l)
 | 
			
		||||
		case '\n':
 | 
			
		||||
			l.lineNum++
 | 
			
		||||
		case 'n':
 | 
			
		||||
			l.backup()
 | 
			
		||||
			return lexNull(l)
 | 
			
		||||
		case 't', 'f':
 | 
			
		||||
			l.backup()
 | 
			
		||||
			return lexBool(l)
 | 
			
		||||
		case '1', '2', '3', '4', '5', '6', '7', '8', '9', '0':
 | 
			
		||||
			l.backup()
 | 
			
		||||
			return lexNumber(l)
 | 
			
		||||
		case '"':
 | 
			
		||||
			return lexString(l)
 | 
			
		||||
		case '[':
 | 
			
		||||
			l.emit(itemBracketOpen)
 | 
			
		||||
		case ']':
 | 
			
		||||
			l.emit(itemBracketClose)
 | 
			
		||||
		case '{':
 | 
			
		||||
			l.emit(itemBraceOpen)
 | 
			
		||||
		case '}':
 | 
			
		||||
			l.emit(itemBraceClose)
 | 
			
		||||
		case ':':
 | 
			
		||||
			l.emit(itemColon)
 | 
			
		||||
		case ',':
 | 
			
		||||
			l.emit(itemComma)
 | 
			
		||||
		case EOF:
 | 
			
		||||
			break
 | 
			
		||||
			break loop
 | 
			
		||||
		default:
 | 
			
		||||
			panic("Unexpected symbol!")
 | 
			
		||||
			panic("Unexpected symbol: " + string(r))
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@ -16,28 +46,81 @@ func lexInitial(l *Lexer) stateFn {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lexNumber(l *Lexer) stateFn {
 | 
			
		||||
	return lexInitial
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lexString(l *Lexer) stateFn {
 | 
			
		||||
	return lexInitial
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lexArray(l *Lexer) stateFn {
 | 
			
		||||
	return lexInitial
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lexObject(l *Lexer) stateFn {
 | 
			
		||||
	return lexInitial
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// lexSpace scans a run of space characters.
 | 
			
		||||
// One space has already been seen.
 | 
			
		||||
// Skip all spaces
 | 
			
		||||
// One space has already been seen
 | 
			
		||||
func lexSpace(l *Lexer) stateFn {
 | 
			
		||||
	for isSpace(l.peek()) {
 | 
			
		||||
		l.next()
 | 
			
		||||
	}
 | 
			
		||||
	l.emit(itemSpace)
 | 
			
		||||
	l.ignore()
 | 
			
		||||
	return lexInitial
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lexNull(l *Lexer) stateFn {
 | 
			
		||||
	if l.acceptString("null") {
 | 
			
		||||
		l.emit(itemNull)
 | 
			
		||||
	} else {
 | 
			
		||||
		return l.errorf("Unexpected token")
 | 
			
		||||
	}
 | 
			
		||||
	return lexInitial
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lexBool(l *Lexer) stateFn {
 | 
			
		||||
	if l.acceptString("true") || l.acceptString("false") {
 | 
			
		||||
		l.emit(itemBool)
 | 
			
		||||
	}
 | 
			
		||||
	return lexInitial
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lexNumber(l *Lexer) stateFn {
 | 
			
		||||
	hasDot := false
 | 
			
		||||
	for {
 | 
			
		||||
		if r := l.peek(); isDigit(r) {
 | 
			
		||||
			l.next()
 | 
			
		||||
		} else if r == '.' {
 | 
			
		||||
			if hasDot {
 | 
			
		||||
				return l.errorf("Invalid number")
 | 
			
		||||
			} else {
 | 
			
		||||
				hasDot = true
 | 
			
		||||
				l.next()
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	l.emit(itemNumber)
 | 
			
		||||
	return lexInitial
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lexString(l *Lexer) stateFn {
 | 
			
		||||
	escaped := false
 | 
			
		||||
loop:
 | 
			
		||||
	for {
 | 
			
		||||
		switch r := l.next(); r {
 | 
			
		||||
		case '\\':
 | 
			
		||||
			escaped = true
 | 
			
		||||
		case '"':
 | 
			
		||||
			if escaped {
 | 
			
		||||
				escaped = false
 | 
			
		||||
			} else {
 | 
			
		||||
				l.emit(itemString)
 | 
			
		||||
				break loop
 | 
			
		||||
			}
 | 
			
		||||
		case EOF:
 | 
			
		||||
			return l.errorf("String hits EOF")
 | 
			
		||||
		default:
 | 
			
		||||
			escaped = false
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return lexInitial
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func isSpace(r rune) bool {
 | 
			
		||||
	return r == ' ' || r == '\t'
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func isDigit(r rune) bool {
 | 
			
		||||
	return strings.IndexRune("1234567890", r) > -1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user