eventstore/bleve: replace with implementation inspired from pyramid.

This commit is contained in:
fiatjaf
2026-04-10 11:18:40 -03:00
parent a8205a3790
commit 4261bc88f8
13 changed files with 805 additions and 189 deletions
+4 -1
View File
@@ -7,6 +7,7 @@ import (
"fiatjaf.com/nostr"
"fiatjaf.com/nostr/eventstore/lmdb"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestBleveFlow(t *testing.T) {
@@ -21,7 +22,9 @@ func TestBleveFlow(t *testing.T) {
Path: "/tmp/blevetest-bleve",
RawEventStore: bb,
}
bl.Init()
err := bl.Init()
require.NoError(t, err, "init")
defer bl.Close()
willDelete := make([]nostr.Event, 0, 3)
-9
View File
@@ -1,9 +0,0 @@
package bleve
import (
"fiatjaf.com/nostr"
)
func (b *BleveBackend) DeleteEvent(id nostr.ID) error {
return b.index.Delete(id.Hex())
}
-9
View File
@@ -1,9 +0,0 @@
package bleve
const (
idField = "i"
contentField = "c"
kindField = "k"
createdAtField = "a"
pubkeyField = "p"
)
+104
View File
@@ -0,0 +1,104 @@
package bleve
import (
"strings"
"unicode"
)
// lexer tokenizes the input string
type Lexer struct {
input string
pos int
peekedQueue []Token
}
func NewLexer(input string) *Lexer {
return &Lexer{input: input, pos: 0}
}
func (l *Lexer) peek() rune {
if l.pos >= len(l.input) {
return 0
}
return rune(l.input[l.pos])
}
func (l *Lexer) advance() rune {
if l.pos >= len(l.input) {
return 0
}
ch := rune(l.input[l.pos])
l.pos++
return ch
}
func (l *Lexer) skipWhitespace() {
for l.peek() != 0 && unicode.IsSpace(l.peek()) {
l.advance()
}
}
func (l *Lexer) readWord() string {
start := l.pos
// read regular word (alphanumeric, hyphens, underscores)
for l.peek() != 0 && !unicode.IsSpace(l.peek()) &&
l.peek() != '(' && l.peek() != ')' && l.peek() != '"' {
l.advance()
}
return l.input[start:l.pos]
}
func (l *Lexer) PeekToken() Token {
next := l.NextToken()
l.peekedQueue = append(l.peekedQueue, next)
return next
}
func (l *Lexer) ReturnToken(tok Token) {
l.peekedQueue = append(l.peekedQueue, tok)
}
func (l *Lexer) NextToken() (tok Token) {
if len(l.peekedQueue) > 0 {
next := l.peekedQueue[len(l.peekedQueue)-1]
l.peekedQueue = l.peekedQueue[0 : len(l.peekedQueue)-1]
return next
}
l.skipWhitespace()
if l.pos >= len(l.input) {
return Token{Type: TokenEOF}
}
ch := l.peek()
switch ch {
case '(':
l.advance()
return Token{Type: TokenLParen, Value: "("}
case ')':
l.advance()
return Token{Type: TokenRParen, Value: ")"}
case '"':
l.advance()
return Token{Type: TokenQuote, Value: "\""}
default:
word := l.readWord()
upperWord := strings.ToUpper(word)
switch upperWord {
case "OR", "||":
return Token{Type: TokenOR, Value: word}
case "AND", "&&":
return Token{Type: TokenAND, Value: word}
case "NOT", "!":
return Token{Type: TokenNOT, Value: word}
default:
return Token{Type: TokenWord, Value: word}
}
}
}
+419 -16
View File
@@ -1,34 +1,99 @@
package bleve
import (
"encoding/json"
"errors"
"fmt"
"iter"
"slices"
"strconv"
"strings"
"sync"
"time"
"fiatjaf.com/nostr"
"fiatjaf.com/nostr/eventstore"
"fiatjaf.com/nostr/nip27"
"fiatjaf.com/nostr/nip73"
"fiatjaf.com/nostr/sdk"
bleve "github.com/blevesearch/bleve/v2"
_ "github.com/blevesearch/bleve/v2/analysis/analyzer/simple"
_ "github.com/blevesearch/bleve/v2/analysis/lang/ar"
_ "github.com/blevesearch/bleve/v2/analysis/lang/cjk"
_ "github.com/blevesearch/bleve/v2/analysis/lang/da"
_ "github.com/blevesearch/bleve/v2/analysis/lang/de"
_ "github.com/blevesearch/bleve/v2/analysis/lang/en"
_ "github.com/blevesearch/bleve/v2/analysis/lang/es"
_ "github.com/blevesearch/bleve/v2/analysis/lang/fa"
_ "github.com/blevesearch/bleve/v2/analysis/lang/fi"
_ "github.com/blevesearch/bleve/v2/analysis/lang/fr"
_ "github.com/blevesearch/bleve/v2/analysis/lang/gl"
_ "github.com/blevesearch/bleve/v2/analysis/lang/hi"
_ "github.com/blevesearch/bleve/v2/analysis/lang/hr"
_ "github.com/blevesearch/bleve/v2/analysis/lang/hu"
_ "github.com/blevesearch/bleve/v2/analysis/lang/in"
_ "github.com/blevesearch/bleve/v2/analysis/lang/it"
_ "github.com/blevesearch/bleve/v2/analysis/lang/nl"
_ "github.com/blevesearch/bleve/v2/analysis/lang/no"
_ "github.com/blevesearch/bleve/v2/analysis/lang/pl"
_ "github.com/blevesearch/bleve/v2/analysis/lang/pt"
_ "github.com/blevesearch/bleve/v2/analysis/lang/ro"
_ "github.com/blevesearch/bleve/v2/analysis/lang/ru"
_ "github.com/blevesearch/bleve/v2/analysis/lang/sv"
_ "github.com/blevesearch/bleve/v2/analysis/lang/tr"
bleveMapping "github.com/blevesearch/bleve/v2/mapping"
bleveQuery "github.com/blevesearch/bleve/v2/search/query"
"github.com/pemistahl/lingua-go"
)
var _ eventstore.Store = (*BleveBackend)(nil)
const (
labelContentField = "c"
labelKindField = "k"
labelCreatedAtField = "a"
labelAuthorField = "p"
labelReferencesField = "r"
labelExtrasField = "x"
)
var SupportedLanguages = []lingua.Language{
// each of these translates to a specific bleve analyzer
// except for japanese-korean-chinese that all use the same "cjk" analyzer
lingua.Arabic,
lingua.Chinese,
lingua.Croatian,
lingua.Danish,
lingua.Dutch,
lingua.English,
lingua.Finnish,
lingua.French,
lingua.German,
lingua.Hindi,
lingua.Hungarian,
lingua.Italian,
lingua.Japanese,
lingua.Korean,
lingua.Persian,
lingua.Polish,
lingua.Portuguese,
lingua.Romanian,
lingua.Russian,
lingua.Spanish,
lingua.Swedish,
lingua.Turkish,
}
type BleveBackend struct {
sync.Mutex
// Path is where the index will be saved
Path string
Path string
RawEventStore eventstore.Store
ReadOnly bool
IndexableKinds []nostr.Kind
// RawEventStore is where we'll fetch the raw events from
// bleve will only store ids, so the actual events must be somewhere else
RawEventStore eventstore.Store
Languages []lingua.Language
languageCodes []string
index bleve.Index
}
func (b *BleveBackend) Close() {
if b.index != nil {
b.index.Close()
}
index bleve.Index
detector lingua.LanguageDetector
}
func (b *BleveBackend) Init() error {
@@ -38,12 +103,89 @@ func (b *BleveBackend) Init() error {
if b.RawEventStore == nil {
return fmt.Errorf("missing RawEventStore")
}
if len(b.IndexableKinds) == 0 {
b.IndexableKinds = []nostr.Kind{0, 1, 6, 11, 16, 20, 21, 22, 24, 1111, 9802, 30023, 30818}
}
// try to open existing index
index, err := bleve.Open(b.Path)
if len(b.Languages) == 0 {
b.Languages = SupportedLanguages
}
validLanguages := make([]lingua.Language, 0, len(b.Languages))
b.languageCodes = make([]string, 0, len(b.Languages))
for _, lang := range b.Languages {
var code string
switch lang {
case lingua.Chinese, lingua.Korean, lingua.Japanese:
code = "cjk"
default:
code = strings.ToLower(lang.IsoCode639_1().String())
}
if slices.Contains(b.languageCodes, code) {
continue
}
validLanguages = append(validLanguages, lang)
b.languageCodes = append(b.languageCodes, code)
}
b.Languages = validLanguages
index, err := bleve.OpenUsing(b.Path, map[string]any{
"read_only": b.ReadOnly,
})
if err == bleve.ErrorIndexPathDoesNotExist {
// create new index with default mapping
mapping := bleveMapping.NewIndexMapping()
mapping.DefaultMapping.Dynamic = false
doc := bleveMapping.NewDocumentStaticMapping()
for _, code := range b.languageCodes {
contentField := bleveMapping.NewTextFieldMapping()
contentField.Analyzer = code
contentField.Store = false
contentField.IncludeTermVectors = false
contentField.DocValues = false
contentField.IncludeInAll = false
doc.AddFieldMappingsAt(labelContentField+"_"+code, contentField)
}
extrasField := bleveMapping.NewTextFieldMapping()
extrasField.Analyzer = "simple"
extrasField.Store = false
extrasField.IncludeTermVectors = false
extrasField.DocValues = false
extrasField.IncludeInAll = false
doc.AddFieldMappingsAt(labelExtrasField, extrasField)
referencesField := bleveMapping.NewKeywordFieldMapping()
referencesField.DocValues = false
referencesField.Store = false
referencesField.IncludeTermVectors = false
referencesField.IncludeInAll = false
doc.AddFieldMappingsAt(labelReferencesField, referencesField)
authorField := bleveMapping.NewKeywordFieldMapping()
authorField.DocValues = false
authorField.Store = false
authorField.IncludeTermVectors = false
doc.AddFieldMappingsAt(labelAuthorField, authorField)
kindField := bleveMapping.NewKeywordFieldMapping()
kindField.DocValues = false
kindField.Store = false
kindField.IncludeTermVectors = false
kindField.IncludeInAll = false
doc.AddFieldMappingsAt(labelKindField, kindField)
timestampField := bleveMapping.NewDateTimeFieldMapping()
timestampField.DocValues = false
timestampField.Store = false
timestampField.IncludeTermVectors = false
timestampField.IncludeInAll = false
doc.AddFieldMappingsAt(labelCreatedAtField, timestampField)
mapping.AddDocumentMapping("_default", doc)
index, err = bleve.New(b.Path, mapping)
if err != nil {
return fmt.Errorf("error creating index: %w", err)
@@ -53,6 +195,116 @@ func (b *BleveBackend) Init() error {
}
b.index = index
b.detector = lingua.NewLanguageDetectorBuilder().
FromLanguages(b.Languages...).
Build()
return nil
}
func (b *BleveBackend) Close() {
if b != nil && b.index != nil {
b.index.Close()
}
}
func (b *BleveBackend) SaveEvent(event nostr.Event) error {
if slices.Contains(b.IndexableKinds, event.Kind) {
return b.indexEvent(event)
}
return nil
}
func (b *BleveBackend) DeleteEvent(id nostr.ID) error {
if b != nil && b.index != nil {
return b.index.Delete(id.Hex())
}
return nil
}
func (b *BleveBackend) indexEvent(evt nostr.Event) error {
docID := evt.ID
var references []string
var extras string
switch evt.Kind {
case 6, 16:
var innerEvt nostr.Event
if err := json.Unmarshal([]byte(evt.Content), &innerEvt); err != nil || !innerEvt.VerifySignature() {
return nil
}
evt = innerEvt
case 0:
var pm sdk.ProfileMetadata
if err := json.Unmarshal([]byte(evt.Content), &pm); err == nil {
evt.Content = pm.Name + "\n" + pm.DisplayName + "\n" + pm.About
references = append(references, pm.NIP05)
}
case 9802:
for _, tag := range evt.Tags {
if len(tag) < 2 {
continue
}
switch tag[0] {
case "comment":
evt.Content += "\n\n" + tag[1]
case "e":
if ptr, err := nostr.EventPointerFromTag(tag); err == nil {
references = append(references, ptr.AsTagReference())
}
case "a":
if ptr, err := nostr.EntityPointerFromTag(tag); err == nil {
references = append(references, ptr.AsTagReference())
}
case "r":
references = append(references, tag[1])
}
}
}
doc := map[string]any{
labelKindField: strconv.Itoa(int(evt.Kind)),
labelAuthorField: evt.PubKey.Hex()[56:],
labelCreatedAtField: evt.CreatedAt.Time(),
}
content := strings.Builder{}
content.Grow(len(evt.Content))
for block := range nip27.Parse(evt.Content) {
if block.Pointer == nil {
content.WriteString(strings.TrimSpace(block.Text))
} else {
references = append(references, block.Pointer.AsTagReference())
if ep, ok := block.Pointer.(nip73.ExternalPointer); ok {
extras += ep.Thing + " "
}
}
}
indexableContent := content.String()
lang, ok := b.detector.DetectLanguageOf(indexableContent)
if !ok {
lang = lingua.English
}
var analyzerLangCode string
switch lang {
case lingua.Japanese, lingua.Chinese, lingua.Korean:
analyzerLangCode = "cjk"
default:
analyzerLangCode = strings.ToLower(lang.IsoCode639_1().String())
}
doc[labelContentField+"_"+analyzerLangCode] = indexableContent
doc[labelReferencesField] = references
doc[labelExtrasField] = extras
if err := b.index.Index(docID.Hex(), doc); err != nil {
return fmt.Errorf("failed to index '%s' document: %w", docID.Hex(), err)
}
return nil
}
@@ -64,3 +316,154 @@ func (b *BleveBackend) CountEvents(filter nostr.Filter) (uint32, error) {
return 0, errors.New("not supported")
}
func (b *BleveBackend) QueryEvents(filter nostr.Filter, maxLimit int) iter.Seq[nostr.Event] {
return func(yield func(nostr.Event) bool) {
if tlimit := filter.GetTheoreticalLimit(); tlimit == 0 {
return
} else if tlimit < maxLimit {
maxLimit = tlimit
}
filter.Search = strings.TrimSpace(filter.Search)
if len(filter.Search) < 2 {
return
}
and := make([]bleveQuery.Query, 0, 3)
searchC := strings.Builder{}
searchC.Grow(len(filter.Search))
for block := range nip27.Parse(filter.Search) {
if block.Pointer != nil {
genericRef := bleve.NewTermQuery(block.Pointer.AsTagReference())
genericRef.SetField(labelReferencesField)
genericRef.SetBoost(2)
var ref bleveQuery.Query = genericRef
if profile, ok := block.Pointer.(nostr.ProfilePointer); ok {
authorQuery := bleve.NewTermQuery(profile.PublicKey.Hex()[56:])
authorQuery.SetField(labelAuthorField)
authorQuery.SetBoost(2)
orRef := bleve.NewDisjunctionQuery()
orRef.AddQuery(genericRef)
orRef.AddQuery(authorQuery)
ref = orRef
} else if addr, ok := block.Pointer.(nostr.EntityPointer); ok {
authorQuery := bleve.NewTermQuery(addr.PublicKey.Hex()[56:])
authorQuery.SetField(labelAuthorField)
authorQuery.SetBoost(2)
orRef := bleve.NewDisjunctionQuery()
orRef.AddQuery(genericRef)
orRef.AddQuery(authorQuery)
ref = orRef
}
and = append(and, ref)
} else {
searchC.WriteString(strings.TrimSpace(block.Text))
}
}
searchContent := searchC.String()
var exactMatches []string
if len(searchContent) > 0 {
contentQueries := make([]bleveQuery.Query, 0, len(b.Languages)+1)
searchQ, exactMatches_, err := parse(searchContent, labelContentField+"_"+b.languageCodes[0])
if err != nil {
for _, code := range b.languageCodes {
match := bleve.NewMatchQuery(searchContent)
match.SetField(labelContentField + "_" + code)
contentQueries = append(contentQueries, match)
}
} else {
contentQueries = append(contentQueries, searchQ)
for _, code := range b.languageCodes[1:] {
searchQ, _, _ := parse(searchContent, labelContentField+"_"+code)
contentQueries = append(contentQueries, searchQ)
}
}
exactMatches = exactMatches_
extrasQ := bleve.NewMatchQuery(searchContent)
extrasQ.SetField(labelExtrasField)
contentQueries = append(contentQueries, extrasQ)
and = append(and, bleveQuery.NewDisjunctionQuery(contentQueries))
}
if len(filter.Kinds) > 0 {
eitherKind := bleve.NewDisjunctionQuery()
for _, kind := range filter.Kinds {
kindQ := bleve.NewTermQuery(strconv.Itoa(int(kind)))
kindQ.SetField(labelKindField)
eitherKind.AddQuery(kindQ)
}
and = append(and, eitherKind)
}
if len(filter.Authors) > 0 {
eitherPubkey := bleve.NewDisjunctionQuery()
for _, pubkey := range filter.Authors {
pubkeyQ := bleve.NewTermQuery(pubkey.Hex()[56:])
pubkeyQ.SetField(labelAuthorField)
eitherPubkey.AddQuery(pubkeyQ)
}
and = append(and, eitherPubkey)
}
if filter.Since != 0 || filter.Until != 0 {
var min time.Time
if filter.Since != 0 {
min = filter.Since.Time()
}
var max time.Time
if filter.Until != 0 {
max = filter.Until.Time()
} else {
max = time.Now()
}
dateRangeQ := bleve.NewDateRangeQuery(min, max)
dateRangeQ.SetField(labelCreatedAtField)
and = append(and, dateRangeQ)
}
q := bleveQuery.NewConjunctionQuery(and)
req := bleve.NewSearchRequest(q)
req.Size = maxLimit
req.From = 0
req.Explain = true
result, err := b.index.Search(req)
if err != nil {
return
}
resultHit:
for _, hit := range result.Hits {
id, err := nostr.IDFromHex(hit.ID)
if err != nil {
continue
}
for evt := range b.RawEventStore.QueryEvents(nostr.Filter{IDs: []nostr.ID{id}}, 1) {
for _, exactMatch := range exactMatches {
if !strings.Contains(strings.ToLower(evt.Content), exactMatch) {
continue resultHit
}
}
for f, v := range filter.Tags {
if !evt.Tags.ContainsAny(f, v) {
continue resultHit
}
}
if !yield(evt) {
return
}
}
}
}
}
-94
View File
@@ -1,94 +0,0 @@
package bleve
import (
"iter"
"strconv"
"fiatjaf.com/nostr"
bleve "github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/search/query"
)
func (b *BleveBackend) QueryEvents(filter nostr.Filter, maxLimit int) iter.Seq[nostr.Event] {
return func(yield func(nostr.Event) bool) {
if tlimit := filter.GetTheoreticalLimit(); tlimit == 0 {
return
} else if tlimit < maxLimit {
maxLimit = tlimit
}
if len(filter.Search) < 2 {
return
}
searchQ := bleve.NewMatchQuery(filter.Search)
searchQ.SetField(contentField)
var q query.Query = searchQ
conjQueries := []query.Query{searchQ}
if len(filter.Kinds) > 0 {
eitherKind := bleve.NewDisjunctionQuery()
for _, kind := range filter.Kinds {
kindQ := bleve.NewTermQuery(strconv.Itoa(int(kind)))
kindQ.SetField(kindField)
eitherKind.AddQuery(kindQ)
}
conjQueries = append(conjQueries, eitherKind)
}
if len(filter.Authors) > 0 {
eitherPubkey := bleve.NewDisjunctionQuery()
for _, pubkey := range filter.Authors {
if len(pubkey) != 64 {
continue
}
pubkeyQ := bleve.NewTermQuery(pubkey.Hex()[56:])
pubkeyQ.SetField(pubkeyField)
eitherPubkey.AddQuery(pubkeyQ)
}
conjQueries = append(conjQueries, eitherPubkey)
}
if filter.Since != 0 || filter.Until != 0 {
var min *float64
if filter.Since != 0 {
minVal := float64(filter.Since)
min = &minVal
}
var max *float64
if filter.Until != 0 {
maxVal := float64(filter.Until)
max = &maxVal
}
dateRangeQ := bleve.NewNumericRangeInclusiveQuery(min, max, nil, nil)
dateRangeQ.SetField(createdAtField)
conjQueries = append(conjQueries, dateRangeQ)
}
if len(conjQueries) > 1 {
q = bleve.NewConjunctionQuery(conjQueries...)
}
req := bleve.NewSearchRequest(q)
req.Size = maxLimit
req.From = 0
result, err := b.index.Search(req)
if err != nil {
return
}
for _, hit := range result.Hits {
id, err := nostr.IDFromHex(hit.ID)
if err != nil {
continue
}
for evt := range b.RawEventStore.QueryEvents(nostr.Filter{IDs: []nostr.ID{id}}, 1) {
if !yield(evt) {
return
}
}
}
}
}
+209
View File
@@ -0,0 +1,209 @@
package bleve
import (
"strings"
bleve "github.com/blevesearch/bleve/v2"
bleveQuery "github.com/blevesearch/bleve/v2/search/query"
)
// token types
type TokenType int
const (
TokenWord TokenType = iota
TokenOR
TokenAND
TokenNOT
TokenLParen
TokenRParen
TokenQuote
TokenEOF
)
type Token struct {
Type TokenType
Value string
}
type Parser struct {
lexer *Lexer
field string
}
func parse(input string, field string) (bleveQuery.Query, []string, error) {
lexer := NewLexer(input)
p := &Parser{
lexer: lexer,
}
var exactMatches []string
var reusableCurrentMatch strings.Builder
var currentExactMatch *strings.Builder
var currentWords []string
var negated bool
var parents []bleveQuery.Query
var parentOps []TokenType // tracks if parent should be AND or OR
var lastOp TokenType = TokenAND // track last operator for parentheses
curr := bleve.NewBooleanQuery()
for {
token := p.lexer.NextToken()
if token.Type == TokenEOF {
if len(currentWords) > 0 {
match := bleve.NewMatchQuery(strings.Join(currentWords, " "))
match.SetOperator(bleveQuery.MatchQueryOperatorAnd)
match.SetField(field)
if negated {
curr.AddMustNot(match)
} else {
curr.AddMust(match)
}
}
break
}
if token.Type == TokenQuote {
if currentExactMatch == nil {
currentExactMatch = &reusableCurrentMatch
} else {
exactMatches = append(exactMatches, currentExactMatch.String())
currentExactMatch.Reset()
reusableCurrentMatch = *currentExactMatch
currentExactMatch = nil
}
continue
}
if currentExactMatch != nil {
if currentExactMatch.Len() > 0 {
currentExactMatch.WriteByte(' ')
}
currentExactMatch.WriteString(strings.ToLower(token.Value))
currentWords = append(currentWords, token.Value)
continue
}
if token.Type == TokenWord {
currentWords = append(currentWords, token.Value)
continue
} else if len(currentWords) > 0 {
match := bleve.NewMatchQuery(strings.Join(currentWords, " "))
match.SetOperator(bleveQuery.MatchQueryOperatorAnd)
match.SetField(field)
if negated {
curr.AddMustNot(match)
} else {
curr.AddMust(match)
}
currentWords = currentWords[:0]
negated = false
}
switch token.Type {
case TokenLParen:
// push current query to parents stack with the last operator
parents = append(parents, curr)
parentOps = append(parentOps, lastOp)
// reset lastOp to default for inner parentheses
lastOp = TokenAND
// start new boolean query for parentheses content
curr = bleve.NewBooleanQuery()
continue
case TokenRParen:
// finalize any remaining words
if len(currentWords) > 0 {
match := bleve.NewMatchQuery(strings.Join(currentWords, " "))
match.SetOperator(bleveQuery.MatchQueryOperatorAnd)
match.SetField(field)
if negated {
curr.AddMustNot(match)
} else {
curr.AddMust(match)
}
currentWords = currentWords[:0]
negated = false
}
// pop parent and merge with current
if len(parents) > 0 {
parent := parents[len(parents)-1]
op := parentOps[len(parentOps)-1]
// create a new boolean query to combine parent and current
var combined bleveQuery.Query
switch op {
case TokenOR:
or := bleve.NewDisjunctionQuery()
or.AddQuery(parent)
or.AddQuery(curr)
combined = or
case TokenAND:
and := bleve.NewConjunctionQuery()
and.AddQuery(parent)
and.AddQuery(curr)
combined = and
}
curr = bleve.NewBooleanQuery()
curr.AddMust(combined)
parents = parents[:len(parents)-1]
parentOps = parentOps[:len(parentOps)-1]
}
continue
}
next := p.lexer.NextToken()
following := p.lexer.PeekToken()
if next.Type == TokenNOT {
negated = true
}
switch token.Type {
case TokenOR:
if next.Type != TokenLParen && !(next.Type == TokenNOT && following.Type == TokenLParen) {
// if this is not followed by a "(" or "NOT (" consider the follow next word as the only parameter
other := bleve.NewMatchQuery(next.Value)
other.SetOperator(bleveQuery.MatchQueryOperatorAnd)
other.SetField(field)
or := bleve.NewDisjunctionQuery()
or.AddQuery(curr)
or.AddQuery(other)
curr = bleve.NewBooleanQuery()
curr.AddMust(or)
} else {
lastOp = TokenOR
}
case TokenAND:
if next.Type != TokenLParen && !(next.Type == TokenNOT && following.Type == TokenLParen) {
// if this is not followed by a "(" consider the follow next word as the only parameter
other := bleve.NewMatchQuery(next.Value)
other.SetOperator(bleveQuery.MatchQueryOperatorAnd)
other.SetField(field)
and := bleve.NewConjunctionQuery()
and.AddQuery(curr)
and.AddQuery(other)
curr = bleve.NewBooleanQuery()
curr.AddMust(and)
} else {
lastOp = TokenAND
}
case TokenNOT:
if next.Type != TokenLParen {
// if this is not followed by a "(" or "NOT (" consider the follow next word as the only parameter
other := bleve.NewMatchQuery(next.Value)
other.SetOperator(bleveQuery.MatchQueryOperatorAnd)
other.SetField(field)
curr.AddMustNot(other)
} else {
negated = true
}
default:
p.lexer.ReturnToken(next)
}
}
return curr, exactMatches, nil
}
+57
View File
@@ -0,0 +1,57 @@
package bleve
import (
"testing"
"github.com/blevesearch/bleve/v2"
"github.com/stretchr/testify/require"
)
func TestParseQuery(t *testing.T) {
mapping := bleve.NewIndexMapping()
mapping.DefaultAnalyzer = "en"
index, err := bleve.NewMemOnly(mapping)
require.NoError(t, err)
docs := []map[string]interface{}{
{"id": "1", "phrase": "I like fruit especially banana and strawberry"},
{"id": "2", "phrase": "I like fruit like apples and oranges"},
{"id": "3", "phrase": "I like vegetables but not fruit"},
{"id": "4", "phrase": "Banana bread is delicious"},
{"id": "5", "phrase": "Strawberry jam and banana smoothie"},
}
for _, doc := range docs {
err := index.Index(doc["id"].(string), doc)
require.NoError(t, err)
}
testQueries := []struct {
query string
expected int
exactMatches []string
}{
{"fruit", 3, nil},
{"banana (NOT delicious)", 2, nil},
{"banana (NOT delicious) bread", 0, nil},
{"smoothie OR apples", 2, nil},
{"smoothie OR apples (NOT fruit)", 1, nil},
{"\"I like\"", 3, []string{"i like"}},
{"banana \"I like fruit\" strawberries", 1, []string{"i like fruit"}},
{"\"I like fruit\" (strawberry OR apple)", 2, []string{"i like fruit"}},
}
for _, test := range testQueries {
query, exactMatches, err := parse(test.query, "phrase")
require.NoError(t, err)
require.Equal(t, test.exactMatches, exactMatches)
search := bleve.NewSearchRequest(query)
results, err := index.Search(search)
require.NoError(t, err)
require.Equal(t, test.expected, int(results.Total),
"query '%s' expected %d results, got %d", test.query, test.expected, results.Total)
}
}
-37
View File
@@ -1,37 +0,0 @@
package bleve
import (
"fmt"
"fiatjaf.com/nostr"
"fiatjaf.com/nostr/eventstore"
)
func (b *BleveBackend) ReplaceEvent(evt nostr.Event) error {
b.Lock()
defer b.Unlock()
filter := nostr.Filter{Kinds: []nostr.Kind{evt.Kind}, Authors: []nostr.PubKey{evt.PubKey}}
if evt.Kind.IsAddressable() {
filter.Tags = nostr.TagMap{"d": []string{evt.Tags.GetD()}}
}
shouldStore := true
for previous := range b.QueryEvents(filter, 1) {
if nostr.IsOlder(previous, evt) {
if err := b.DeleteEvent(previous.ID); err != nil {
return fmt.Errorf("failed to delete event for replacing: %w", err)
}
} else {
shouldStore = false
}
}
if shouldStore {
if err := b.SaveEvent(evt); err != nil && err != eventstore.ErrDupEvent {
return fmt.Errorf("failed to save: %w", err)
}
}
return nil
}
-23
View File
@@ -1,23 +0,0 @@
package bleve
import (
"fmt"
"strconv"
"fiatjaf.com/nostr"
)
func (b *BleveBackend) SaveEvent(evt nostr.Event) error {
doc := map[string]interface{}{
contentField: evt.Content,
kindField: strconv.Itoa(int(evt.Kind)),
pubkeyField: evt.PubKey.Hex()[56:],
createdAtField: float64(evt.CreatedAt),
}
if err := b.index.Index(evt.ID.Hex(), doc); err != nil {
return fmt.Errorf("failed to index '%s' document: %w", evt.ID, err)
}
return nil
}
+3
View File
@@ -28,6 +28,8 @@ type BoltBackend struct {
MapSize int64
DB *bbolt.DB
ReadOnly bool
EnableHLLCacheFor func(kind nostr.Kind) (useCache bool, skipSavingActualEvent bool)
}
@@ -36,6 +38,7 @@ func (b *BoltBackend) Init() error {
Timeout: 2 * time.Second,
PreLoadFreelist: true,
FreelistType: bbolt.FreelistMapType,
ReadOnly: b.ReadOnly,
})
if err != nil {
return err
+3
View File
@@ -43,6 +43,7 @@ require (
fiatjaf.com/lib v0.3.6
github.com/dgraph-io/ristretto/v2 v2.3.0
github.com/go-git/go-git/v5 v5.16.3
github.com/pemistahl/lingua-go v1.4.0
github.com/sivukhin/godjot v1.0.6
github.com/templexxx/cpu v0.0.1
github.com/templexxx/xhex v0.0.0-20200614015412-aed53437177b
@@ -64,6 +65,7 @@ require (
github.com/blevesearch/scorch_segment_api/v2 v2.2.16 // indirect
github.com/blevesearch/segment v0.9.1 // indirect
github.com/blevesearch/snowballstem v0.9.0 // indirect
github.com/blevesearch/stempel v0.2.0 // indirect
github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
github.com/blevesearch/vellum v1.0.11 // indirect
github.com/blevesearch/zapx/v11 v11.3.10 // indirect
@@ -94,6 +96,7 @@ require (
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect
github.com/savsgio/gotils v0.0.0-20240704082632-aef3928b8a38 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
+6
View File
@@ -42,6 +42,8 @@ github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+j
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
github.com/blevesearch/stempel v0.2.0 h1:CYzVPaScODMvgE9o+kf6D4RJ/VRomyi9uHF+PtB+Afc=
github.com/blevesearch/stempel v0.2.0/go.mod h1:wjeTHqQv+nQdbPuJ/YcvOjTInA2EIc6Ks1FoSUzSLvc=
github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A=
github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ=
github.com/blevesearch/vellum v1.0.11 h1:SJI97toEFTtA9WsDZxkyGTaBWFdWl1n2LEDCXLCq/AU=
@@ -192,6 +194,8 @@ github.com/onsi/gomega v1.4.1/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
github.com/pemistahl/lingua-go v1.4.0 h1:ifYhthrlW7iO4icdubwlduYnmwU37V1sbNrwhKBR4rM=
github.com/pemistahl/lingua-go v1.4.0/go.mod h1:ECuM1Hp/3hvyh7k8aWSqNCPlTxLemFZsRjocUf3KgME=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -209,6 +213,8 @@ github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
github.com/savsgio/gotils v0.0.0-20240704082632-aef3928b8a38 h1:D0vL7YNisV2yqE55+q0lFuGse6U8lxlg7fYTctlT5Gc=
github.com/savsgio/gotils v0.0.0-20240704082632-aef3928b8a38/go.mod h1:sM7Mt7uEoCeFSCBM+qBrqvEo+/9vdmj19wzp3yzUhmg=
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/sivukhin/godjot v1.0.6 h1:yoRD+hlcDbSxP9Gd/KRVlEFXgtGyZyt0CHwhY6Gk3EQ=
github.com/sivukhin/godjot v1.0.6/go.mod h1:wA6KdR4Z+XpwdwyViPDLWYYxT72pKjNc6XGA9I025gM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=