package bleve import ( "encoding/json" "errors" "fmt" "iter" "slices" "strconv" "strings" "sync" "time" "fiatjaf.com/nostr" "fiatjaf.com/nostr/eventstore" "fiatjaf.com/nostr/nip27" "fiatjaf.com/nostr/nip73" "fiatjaf.com/nostr/sdk" bleve "github.com/blevesearch/bleve/v2" _ "github.com/blevesearch/bleve/v2/analysis/analyzer/simple" _ "github.com/blevesearch/bleve/v2/analysis/lang/ar" _ "github.com/blevesearch/bleve/v2/analysis/lang/cjk" _ "github.com/blevesearch/bleve/v2/analysis/lang/da" _ "github.com/blevesearch/bleve/v2/analysis/lang/de" _ "github.com/blevesearch/bleve/v2/analysis/lang/en" _ "github.com/blevesearch/bleve/v2/analysis/lang/es" _ "github.com/blevesearch/bleve/v2/analysis/lang/fa" _ "github.com/blevesearch/bleve/v2/analysis/lang/fi" _ "github.com/blevesearch/bleve/v2/analysis/lang/fr" _ "github.com/blevesearch/bleve/v2/analysis/lang/gl" _ "github.com/blevesearch/bleve/v2/analysis/lang/hi" _ "github.com/blevesearch/bleve/v2/analysis/lang/hr" _ "github.com/blevesearch/bleve/v2/analysis/lang/hu" _ "github.com/blevesearch/bleve/v2/analysis/lang/in" _ "github.com/blevesearch/bleve/v2/analysis/lang/it" _ "github.com/blevesearch/bleve/v2/analysis/lang/nl" _ "github.com/blevesearch/bleve/v2/analysis/lang/no" _ "github.com/blevesearch/bleve/v2/analysis/lang/pl" _ "github.com/blevesearch/bleve/v2/analysis/lang/pt" _ "github.com/blevesearch/bleve/v2/analysis/lang/ro" _ "github.com/blevesearch/bleve/v2/analysis/lang/ru" _ "github.com/blevesearch/bleve/v2/analysis/lang/sv" _ "github.com/blevesearch/bleve/v2/analysis/lang/tr" bleveMapping "github.com/blevesearch/bleve/v2/mapping" bleveQuery "github.com/blevesearch/bleve/v2/search/query" "github.com/pemistahl/lingua-go" ) const ( labelContentField = "c" labelKindField = "k" labelCreatedAtField = "a" labelAuthorField = "p" labelReferencesField = "r" labelExtrasField = "x" ) var SupportedLanguages = []lingua.Language{ // each of these translates to a specific bleve analyzer // except for japanese-korean-chinese that all use the same "cjk" analyzer lingua.Arabic, lingua.Chinese, lingua.Croatian, lingua.Danish, lingua.Dutch, lingua.English, lingua.Finnish, lingua.French, lingua.German, lingua.Hindi, lingua.Hungarian, lingua.Italian, lingua.Japanese, lingua.Korean, lingua.Persian, lingua.Polish, lingua.Portuguese, lingua.Romanian, lingua.Russian, lingua.Spanish, lingua.Swedish, lingua.Turkish, } type BleveBackend struct { sync.Mutex Path string RawEventStore eventstore.Store ReadOnly bool OpenTimeout time.Duration IndexableKinds []nostr.Kind Languages []lingua.Language languageCodes []string index bleve.Index detector lingua.LanguageDetector } func (b *BleveBackend) Init() error { if b.Path == "" { return fmt.Errorf("missing Path") } if b.RawEventStore == nil { return fmt.Errorf("missing RawEventStore") } if len(b.Languages) == 0 { return fmt.Errorf("missing Languages") } if len(b.IndexableKinds) == 0 { b.IndexableKinds = []nostr.Kind{0, 1, 6, 11, 16, 20, 21, 22, 24, 1111, 9802, 30023, 30818} } validLanguages := make([]lingua.Language, 0, len(b.Languages)) b.languageCodes = make([]string, 0, len(b.Languages)) for _, lang := range b.Languages { var code string switch lang { case lingua.Chinese, lingua.Korean, lingua.Japanese: code = "cjk" default: code = strings.ToLower(lang.IsoCode639_1().String()) } if slices.Contains(b.languageCodes, code) { continue } validLanguages = append(validLanguages, lang) b.languageCodes = append(b.languageCodes, code) } b.Languages = validLanguages opts := map[string]any{ "read_only": b.ReadOnly, } if b.OpenTimeout != 0 { opts["bolt_timeout"] = b.OpenTimeout.String() } index, err := bleve.OpenUsing(b.Path, opts) if err == bleve.ErrorIndexPathDoesNotExist { mapping := bleveMapping.NewIndexMapping() mapping.DefaultMapping.Dynamic = false doc := bleveMapping.NewDocumentStaticMapping() for _, code := range b.languageCodes { contentField := bleveMapping.NewTextFieldMapping() contentField.Analyzer = code contentField.Store = false contentField.IncludeTermVectors = false contentField.DocValues = false contentField.IncludeInAll = false doc.AddFieldMappingsAt(labelContentField+"_"+code, contentField) } extrasField := bleveMapping.NewTextFieldMapping() extrasField.Analyzer = "simple" extrasField.Store = false extrasField.IncludeTermVectors = false extrasField.DocValues = false extrasField.IncludeInAll = false doc.AddFieldMappingsAt(labelExtrasField, extrasField) referencesField := bleveMapping.NewKeywordFieldMapping() referencesField.DocValues = false referencesField.Store = false referencesField.IncludeTermVectors = false referencesField.IncludeInAll = false doc.AddFieldMappingsAt(labelReferencesField, referencesField) authorField := bleveMapping.NewKeywordFieldMapping() authorField.DocValues = false authorField.Store = false authorField.IncludeTermVectors = false doc.AddFieldMappingsAt(labelAuthorField, authorField) kindField := bleveMapping.NewKeywordFieldMapping() kindField.DocValues = false kindField.Store = false kindField.IncludeTermVectors = false kindField.IncludeInAll = false doc.AddFieldMappingsAt(labelKindField, kindField) timestampField := bleveMapping.NewDateTimeFieldMapping() timestampField.DocValues = false timestampField.Store = false timestampField.IncludeTermVectors = false timestampField.IncludeInAll = false doc.AddFieldMappingsAt(labelCreatedAtField, timestampField) mapping.AddDocumentMapping("_default", doc) index, err = bleve.New(b.Path, mapping) if err != nil { return fmt.Errorf("error creating index: %w", err) } } else if err != nil { return fmt.Errorf("error opening index: %w", err) } b.index = index b.detector = lingua.NewLanguageDetectorBuilder(). FromLanguages(b.Languages...). Build() return nil } func (b *BleveBackend) Close() { if b != nil && b.index != nil { b.index.Close() } } func (b *BleveBackend) SaveEvent(event nostr.Event) error { if slices.Contains(b.IndexableKinds, event.Kind) { return b.indexEvent(event) } return nil } func (b *BleveBackend) DeleteEvent(id nostr.ID) error { if b != nil && b.index != nil { return b.index.Delete(id.Hex()) } return nil } func (b *BleveBackend) indexEvent(evt nostr.Event) error { docID := evt.ID var references []string var extras string switch evt.Kind { case 6, 16: var innerEvt nostr.Event if err := json.Unmarshal([]byte(evt.Content), &innerEvt); err != nil || !innerEvt.VerifySignature() { return nil } evt = innerEvt case 0: var pm sdk.ProfileMetadata if err := json.Unmarshal([]byte(evt.Content), &pm); err == nil { evt.Content = pm.Name + "\n" + pm.DisplayName + "\n" + pm.About references = append(references, pm.NIP05) } } for _, tag := range evt.Tags { if len(tag) < 2 { continue } switch tag[0] { case "comment", "name", "title", "about", "description": evt.Content += "\n\n" + tag[1] case "e": if ptr, err := nostr.EventPointerFromTag(tag); err == nil { references = append(references, ptr.AsTagReference()) } case "a": if ptr, err := nostr.EntityPointerFromTag(tag); err == nil { references = append(references, ptr.AsTagReference()) } case "r": references = append(references, tag[1]) } } doc := map[string]any{ labelKindField: strconv.Itoa(int(evt.Kind)), labelAuthorField: evt.PubKey.Hex()[56:], labelCreatedAtField: evt.CreatedAt.Time(), } content := strings.Builder{} content.Grow(len(evt.Content)) for block := range nip27.Parse(evt.Content) { if block.Pointer == nil { content.WriteString(strings.TrimSpace(block.Text)) } else { references = append(references, block.Pointer.AsTagReference()) if ep, ok := block.Pointer.(nip73.ExternalPointer); ok { extras += ep.Thing + " " } } } indexableContent := content.String() lang, ok := b.detector.DetectLanguageOf(indexableContent) if !ok { lang = lingua.English } var analyzerLangCode string switch lang { case lingua.Japanese, lingua.Chinese, lingua.Korean: analyzerLangCode = "cjk" default: analyzerLangCode = strings.ToLower(lang.IsoCode639_1().String()) } doc[labelContentField+"_"+analyzerLangCode] = indexableContent doc[labelReferencesField] = references doc[labelExtrasField] = extras if err := b.index.Index(docID.Hex(), doc); err != nil { return fmt.Errorf("failed to index '%s' document: %w", docID.Hex(), err) } return nil } func (b *BleveBackend) CountEvents(filter nostr.Filter) (uint32, error) { if filter.String() == "{}" { count, err := b.index.DocCount() return uint32(count), err } return 0, errors.New("not supported") } func (b *BleveBackend) QueryEvents(filter nostr.Filter, maxLimit int) iter.Seq[nostr.Event] { return func(yield func(nostr.Event) bool) { if tlimit := filter.GetTheoreticalLimit(); tlimit == 0 { return } else if tlimit < maxLimit { maxLimit = tlimit } filter.Search = strings.TrimSpace(filter.Search) if len(filter.Search) < 2 { return } and := make([]bleveQuery.Query, 0, 3) searchC := strings.Builder{} searchC.Grow(len(filter.Search)) for block := range nip27.Parse(filter.Search) { if block.Pointer != nil { genericRef := bleve.NewTermQuery(block.Pointer.AsTagReference()) genericRef.SetField(labelReferencesField) genericRef.SetBoost(2) var ref bleveQuery.Query = genericRef if profile, ok := block.Pointer.(nostr.ProfilePointer); ok { authorQuery := bleve.NewTermQuery(profile.PublicKey.Hex()[56:]) authorQuery.SetField(labelAuthorField) authorQuery.SetBoost(2) orRef := bleve.NewDisjunctionQuery() orRef.AddQuery(genericRef) orRef.AddQuery(authorQuery) ref = orRef } else if addr, ok := block.Pointer.(nostr.EntityPointer); ok { authorQuery := bleve.NewTermQuery(addr.PublicKey.Hex()[56:]) authorQuery.SetField(labelAuthorField) authorQuery.SetBoost(2) orRef := bleve.NewDisjunctionQuery() orRef.AddQuery(genericRef) orRef.AddQuery(authorQuery) ref = orRef } and = append(and, ref) } else { searchC.WriteString(strings.TrimSpace(block.Text)) } } searchContent := searchC.String() var exactMatches []string if len(searchContent) > 0 { contentQueries := make([]bleveQuery.Query, 0, len(b.Languages)+1) searchQ, exactMatches_, err := parse(searchContent, labelContentField+"_"+b.languageCodes[0]) if err != nil { for _, code := range b.languageCodes { match := bleve.NewMatchQuery(searchContent) match.SetField(labelContentField + "_" + code) contentQueries = append(contentQueries, match) } } else { contentQueries = append(contentQueries, searchQ) for _, code := range b.languageCodes[1:] { searchQ, _, _ := parse(searchContent, labelContentField+"_"+code) contentQueries = append(contentQueries, searchQ) } } exactMatches = exactMatches_ extrasQ := bleve.NewMatchQuery(searchContent) extrasQ.SetField(labelExtrasField) contentQueries = append(contentQueries, extrasQ) and = append(and, bleveQuery.NewDisjunctionQuery(contentQueries)) } if len(filter.Kinds) > 0 { eitherKind := bleve.NewDisjunctionQuery() for _, kind := range filter.Kinds { kindQ := bleve.NewTermQuery(strconv.Itoa(int(kind))) kindQ.SetField(labelKindField) eitherKind.AddQuery(kindQ) } and = append(and, eitherKind) } if len(filter.Authors) > 0 { eitherPubkey := bleve.NewDisjunctionQuery() for _, pubkey := range filter.Authors { pubkeyQ := bleve.NewTermQuery(pubkey.Hex()[56:]) pubkeyQ.SetField(labelAuthorField) eitherPubkey.AddQuery(pubkeyQ) } and = append(and, eitherPubkey) } if filter.Since != 0 || filter.Until != 0 { var min time.Time if filter.Since != 0 { min = filter.Since.Time() } var max time.Time if filter.Until != 0 { max = filter.Until.Time() } else { max = time.Now() } dateRangeQ := bleve.NewDateRangeQuery(min, max) dateRangeQ.SetField(labelCreatedAtField) and = append(and, dateRangeQ) } q := bleveQuery.NewConjunctionQuery(and) req := bleve.NewSearchRequest(q) req.Size = maxLimit req.From = 0 req.Explain = true result, err := b.index.Search(req) if err != nil { return } resultHit: for _, hit := range result.Hits { id, err := nostr.IDFromHex(hit.ID) if err != nil { continue } for evt := range b.RawEventStore.QueryEvents(nostr.Filter{IDs: []nostr.ID{id}}, 1) { for _, exactMatch := range exactMatches { if !strings.Contains(strings.ToLower(evt.Content), exactMatch) { continue resultHit } } for f, v := range filter.Tags { if !evt.Tags.ContainsAny(f, v) { continue resultHit } } if !yield(evt) { return } } } } }