105 lines
1.9 KiB
Go
105 lines
1.9 KiB
Go
package bleve
|
|
|
|
import (
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// lexer tokenizes the input string
|
|
type Lexer struct {
|
|
input string
|
|
pos int
|
|
|
|
peekedQueue []Token
|
|
}
|
|
|
|
func NewLexer(input string) *Lexer {
|
|
return &Lexer{input: input, pos: 0}
|
|
}
|
|
|
|
func (l *Lexer) peek() rune {
|
|
if l.pos >= len(l.input) {
|
|
return 0
|
|
}
|
|
return rune(l.input[l.pos])
|
|
}
|
|
|
|
func (l *Lexer) advance() rune {
|
|
if l.pos >= len(l.input) {
|
|
return 0
|
|
}
|
|
ch := rune(l.input[l.pos])
|
|
l.pos++
|
|
return ch
|
|
}
|
|
|
|
func (l *Lexer) skipWhitespace() {
|
|
for l.peek() != 0 && unicode.IsSpace(l.peek()) {
|
|
l.advance()
|
|
}
|
|
}
|
|
|
|
func (l *Lexer) readWord() string {
|
|
start := l.pos
|
|
|
|
// read regular word (alphanumeric, hyphens, underscores)
|
|
for l.peek() != 0 && !unicode.IsSpace(l.peek()) &&
|
|
l.peek() != '(' && l.peek() != ')' && l.peek() != '"' {
|
|
l.advance()
|
|
}
|
|
|
|
return l.input[start:l.pos]
|
|
}
|
|
|
|
func (l *Lexer) PeekToken() Token {
|
|
next := l.NextToken()
|
|
l.peekedQueue = append(l.peekedQueue, next)
|
|
return next
|
|
}
|
|
|
|
func (l *Lexer) ReturnToken(tok Token) {
|
|
l.peekedQueue = append(l.peekedQueue, tok)
|
|
}
|
|
|
|
func (l *Lexer) NextToken() (tok Token) {
|
|
if len(l.peekedQueue) > 0 {
|
|
next := l.peekedQueue[len(l.peekedQueue)-1]
|
|
l.peekedQueue = l.peekedQueue[0 : len(l.peekedQueue)-1]
|
|
return next
|
|
}
|
|
|
|
l.skipWhitespace()
|
|
|
|
if l.pos >= len(l.input) {
|
|
return Token{Type: TokenEOF}
|
|
}
|
|
|
|
ch := l.peek()
|
|
|
|
switch ch {
|
|
case '(':
|
|
l.advance()
|
|
return Token{Type: TokenLParen, Value: "("}
|
|
case ')':
|
|
l.advance()
|
|
return Token{Type: TokenRParen, Value: ")"}
|
|
case '"':
|
|
l.advance()
|
|
return Token{Type: TokenQuote, Value: "\""}
|
|
default:
|
|
word := l.readWord()
|
|
upperWord := strings.ToUpper(word)
|
|
|
|
switch upperWord {
|
|
case "OR", "||":
|
|
return Token{Type: TokenOR, Value: word}
|
|
case "AND", "&&":
|
|
return Token{Type: TokenAND, Value: word}
|
|
case "NOT", "!":
|
|
return Token{Type: TokenNOT, Value: word}
|
|
default:
|
|
return Token{Type: TokenWord, Value: word}
|
|
}
|
|
}
|
|
}
|