Files

105 lines
1.9 KiB
Go

package bleve
import (
"strings"
"unicode"
)
// lexer tokenizes the input string
type Lexer struct {
input string
pos int
peekedQueue []Token
}
func NewLexer(input string) *Lexer {
return &Lexer{input: input, pos: 0}
}
func (l *Lexer) peek() rune {
if l.pos >= len(l.input) {
return 0
}
return rune(l.input[l.pos])
}
func (l *Lexer) advance() rune {
if l.pos >= len(l.input) {
return 0
}
ch := rune(l.input[l.pos])
l.pos++
return ch
}
func (l *Lexer) skipWhitespace() {
for l.peek() != 0 && unicode.IsSpace(l.peek()) {
l.advance()
}
}
func (l *Lexer) readWord() string {
start := l.pos
// read regular word (alphanumeric, hyphens, underscores)
for l.peek() != 0 && !unicode.IsSpace(l.peek()) &&
l.peek() != '(' && l.peek() != ')' && l.peek() != '"' {
l.advance()
}
return l.input[start:l.pos]
}
func (l *Lexer) PeekToken() Token {
next := l.NextToken()
l.peekedQueue = append(l.peekedQueue, next)
return next
}
func (l *Lexer) ReturnToken(tok Token) {
l.peekedQueue = append(l.peekedQueue, tok)
}
func (l *Lexer) NextToken() (tok Token) {
if len(l.peekedQueue) > 0 {
next := l.peekedQueue[len(l.peekedQueue)-1]
l.peekedQueue = l.peekedQueue[0 : len(l.peekedQueue)-1]
return next
}
l.skipWhitespace()
if l.pos >= len(l.input) {
return Token{Type: TokenEOF}
}
ch := l.peek()
switch ch {
case '(':
l.advance()
return Token{Type: TokenLParen, Value: "("}
case ')':
l.advance()
return Token{Type: TokenRParen, Value: ")"}
case '"':
l.advance()
return Token{Type: TokenQuote, Value: "\""}
default:
word := l.readWord()
upperWord := strings.ToUpper(word)
switch upperWord {
case "OR", "||":
return Token{Type: TokenOR, Value: word}
case "AND", "&&":
return Token{Type: TokenAND, Value: word}
case "NOT", "!":
return Token{Type: TokenNOT, Value: word}
default:
return Token{Type: TokenWord, Value: word}
}
}
}