Files

210 lines
5.1 KiB
Go

package bleve
import (
"strings"
bleve "github.com/blevesearch/bleve/v2"
bleveQuery "github.com/blevesearch/bleve/v2/search/query"
)
// token types
type TokenType int
const (
TokenWord TokenType = iota
TokenOR
TokenAND
TokenNOT
TokenLParen
TokenRParen
TokenQuote
TokenEOF
)
type Token struct {
Type TokenType
Value string
}
type Parser struct {
lexer *Lexer
field string
}
func parse(input string, field string) (bleveQuery.Query, []string, error) {
lexer := NewLexer(input)
p := &Parser{
lexer: lexer,
}
var exactMatches []string
var reusableCurrentMatch strings.Builder
var currentExactMatch *strings.Builder
var currentWords []string
var negated bool
var parents []bleveQuery.Query
var parentOps []TokenType // tracks if parent should be AND or OR
var lastOp TokenType = TokenAND // track last operator for parentheses
curr := bleve.NewBooleanQuery()
for {
token := p.lexer.NextToken()
if token.Type == TokenEOF {
if len(currentWords) > 0 {
match := bleve.NewMatchQuery(strings.Join(currentWords, " "))
match.SetOperator(bleveQuery.MatchQueryOperatorAnd)
match.SetField(field)
if negated {
curr.AddMustNot(match)
} else {
curr.AddMust(match)
}
}
break
}
if token.Type == TokenQuote {
if currentExactMatch == nil {
currentExactMatch = &reusableCurrentMatch
} else {
exactMatches = append(exactMatches, currentExactMatch.String())
currentExactMatch.Reset()
reusableCurrentMatch = *currentExactMatch
currentExactMatch = nil
}
continue
}
if currentExactMatch != nil {
if currentExactMatch.Len() > 0 {
currentExactMatch.WriteByte(' ')
}
currentExactMatch.WriteString(strings.ToLower(token.Value))
currentWords = append(currentWords, token.Value)
continue
}
if token.Type == TokenWord {
currentWords = append(currentWords, token.Value)
continue
} else if len(currentWords) > 0 {
match := bleve.NewMatchQuery(strings.Join(currentWords, " "))
match.SetOperator(bleveQuery.MatchQueryOperatorAnd)
match.SetField(field)
if negated {
curr.AddMustNot(match)
} else {
curr.AddMust(match)
}
currentWords = currentWords[:0]
negated = false
}
switch token.Type {
case TokenLParen:
// push current query to parents stack with the last operator
parents = append(parents, curr)
parentOps = append(parentOps, lastOp)
// reset lastOp to default for inner parentheses
lastOp = TokenAND
// start new boolean query for parentheses content
curr = bleve.NewBooleanQuery()
continue
case TokenRParen:
// finalize any remaining words
if len(currentWords) > 0 {
match := bleve.NewMatchQuery(strings.Join(currentWords, " "))
match.SetOperator(bleveQuery.MatchQueryOperatorAnd)
match.SetField(field)
if negated {
curr.AddMustNot(match)
} else {
curr.AddMust(match)
}
currentWords = currentWords[:0]
negated = false
}
// pop parent and merge with current
if len(parents) > 0 {
parent := parents[len(parents)-1]
op := parentOps[len(parentOps)-1]
// create a new boolean query to combine parent and current
var combined bleveQuery.Query
switch op {
case TokenOR:
or := bleve.NewDisjunctionQuery()
or.AddQuery(parent)
or.AddQuery(curr)
combined = or
case TokenAND:
and := bleve.NewConjunctionQuery()
and.AddQuery(parent)
and.AddQuery(curr)
combined = and
}
curr = bleve.NewBooleanQuery()
curr.AddMust(combined)
parents = parents[:len(parents)-1]
parentOps = parentOps[:len(parentOps)-1]
}
continue
}
next := p.lexer.NextToken()
following := p.lexer.PeekToken()
if next.Type == TokenNOT {
negated = true
}
switch token.Type {
case TokenOR:
if next.Type != TokenLParen && !(next.Type == TokenNOT && following.Type == TokenLParen) {
// if this is not followed by a "(" or "NOT (" consider the follow next word as the only parameter
other := bleve.NewMatchQuery(next.Value)
other.SetOperator(bleveQuery.MatchQueryOperatorAnd)
other.SetField(field)
or := bleve.NewDisjunctionQuery()
or.AddQuery(curr)
or.AddQuery(other)
curr = bleve.NewBooleanQuery()
curr.AddMust(or)
} else {
lastOp = TokenOR
}
case TokenAND:
if next.Type != TokenLParen && !(next.Type == TokenNOT && following.Type == TokenLParen) {
// if this is not followed by a "(" consider the follow next word as the only parameter
other := bleve.NewMatchQuery(next.Value)
other.SetOperator(bleveQuery.MatchQueryOperatorAnd)
other.SetField(field)
and := bleve.NewConjunctionQuery()
and.AddQuery(curr)
and.AddQuery(other)
curr = bleve.NewBooleanQuery()
curr.AddMust(and)
} else {
lastOp = TokenAND
}
case TokenNOT:
if next.Type != TokenLParen {
// if this is not followed by a "(" or "NOT (" consider the follow next word as the only parameter
other := bleve.NewMatchQuery(next.Value)
other.SetOperator(bleveQuery.MatchQueryOperatorAnd)
other.SetField(field)
curr.AddMustNot(other)
} else {
negated = true
}
default:
p.lexer.ReturnToken(next)
}
}
return curr, exactMatches, nil
}