nostrlib/eventstore/bleve/lexer.go

package bleve

import (
	"strings"
	"unicode"
)

// lexer tokenizes the input string
type Lexer struct {
	input string
	pos   int

	peekedQueue []Token
}

func NewLexer(input string) *Lexer {
	return &Lexer{input: input, pos: 0}
}

func (l *Lexer) peek() rune {
	if l.pos >= len(l.input) {
		return 0
	}
	return rune(l.input[l.pos])
}

func (l *Lexer) advance() rune {
	if l.pos >= len(l.input) {
		return 0
	}
	ch := rune(l.input[l.pos])
	l.pos++
	return ch
}

func (l *Lexer) skipWhitespace() {
	for l.peek() != 0 && unicode.IsSpace(l.peek()) {
		l.advance()
	}
}

func (l *Lexer) readWord() string {
	start := l.pos

	// read regular word (alphanumeric, hyphens, underscores)
	for l.peek() != 0 && !unicode.IsSpace(l.peek()) &&
		l.peek() != '(' && l.peek() != ')' && l.peek() != '"' {
		l.advance()
	}

	return l.input[start:l.pos]
}

func (l *Lexer) PeekToken() Token {
	next := l.NextToken()
	l.peekedQueue = append(l.peekedQueue, next)
	return next
}

func (l *Lexer) ReturnToken(tok Token) {
	l.peekedQueue = append(l.peekedQueue, tok)
}

func (l *Lexer) NextToken() (tok Token) {
	if len(l.peekedQueue) > 0 {
		next := l.peekedQueue[len(l.peekedQueue)-1]
		l.peekedQueue = l.peekedQueue[0 : len(l.peekedQueue)-1]
		return next
	}

	l.skipWhitespace()

	if l.pos >= len(l.input) {
		return Token{Type: TokenEOF}
	}

	ch := l.peek()

	switch ch {
	case '(':
		l.advance()
		return Token{Type: TokenLParen, Value: "("}
	case ')':
		l.advance()
		return Token{Type: TokenRParen, Value: ")"}
	case '"':
		l.advance()
		return Token{Type: TokenQuote, Value: "\""}
	default:
		word := l.readWord()
		upperWord := strings.ToUpper(word)

		switch upperWord {
		case "OR", "||":
			return Token{Type: TokenOR, Value: word}
		case "AND", "&&":
			return Token{Type: TokenAND, Value: word}
		case "NOT", "!":
			return Token{Type: TokenNOT, Value: word}
		default:
			return Token{Type: TokenWord, Value: word}
		}
	}
}