Add search chapter
This commit is contained in:
+54
-9
@@ -83,6 +83,10 @@ pub struct Filter {
|
||||
/// Maximum number of events a consumer should return. This is not a
|
||||
/// matching criterion — [`matches`](Filter::matches) ignores it.
|
||||
pub limit: Option<usize>,
|
||||
/// NIP-50 full-text search query. The string is opaque at this point:
|
||||
/// its structure and local relevance scoring are the subject of the next
|
||||
/// chapter. `None` means no search constraint.
|
||||
pub search: Option<String>,
|
||||
}
|
||||
```
|
||||
|
||||
@@ -91,6 +95,10 @@ the struct. But it is a result-count constraint for consumers (relays,
|
||||
storage engines), not a predicate over individual events. The `matches`
|
||||
method ignores it entirely.
|
||||
|
||||
The `search` field comes from NIP-50. At this point it is just a string
|
||||
carried alongside the other fields; the [Search](12-search.md) chapter
|
||||
gives it meaning — a query model and a local relevance score.
|
||||
|
||||
## Matching
|
||||
|
||||
Matching walks each present field and returns `false` as soon as one
|
||||
@@ -190,6 +198,7 @@ impl Filter {
|
||||
since: None,
|
||||
until: None,
|
||||
limit: None,
|
||||
search: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -446,6 +455,9 @@ impl Serialize for Filter {
|
||||
if self.limit.is_some() {
|
||||
count += 1;
|
||||
}
|
||||
if self.search.is_some() {
|
||||
count += 1;
|
||||
}
|
||||
|
||||
let mut map = serializer.serialize_map(Some(count))?;
|
||||
|
||||
@@ -477,6 +489,9 @@ impl Serialize for Filter {
|
||||
if let Some(limit) = self.limit {
|
||||
map.serialize_entry("limit", &limit)?;
|
||||
}
|
||||
if let Some(search) = &self.search {
|
||||
map.serialize_entry("search", search)?;
|
||||
}
|
||||
|
||||
map.end()
|
||||
}
|
||||
@@ -511,6 +526,7 @@ impl<'de> Visitor<'de> for FilterVisitor {
|
||||
let mut since: Option<u64> = None;
|
||||
let mut until: Option<u64> = None;
|
||||
let mut limit: Option<usize> = None;
|
||||
let mut search: Option<String> = None;
|
||||
|
||||
while let Some(key) = map.next_key::<String>()? {
|
||||
match key.as_str() {
|
||||
@@ -544,6 +560,7 @@ impl<'de> Visitor<'de> for FilterVisitor {
|
||||
"since" => since = Some(map.next_value()?),
|
||||
"until" => until = Some(map.next_value()?),
|
||||
"limit" => limit = Some(map.next_value()?),
|
||||
"search" => search = Some(map.next_value()?),
|
||||
other if other.starts_with('#') => {
|
||||
let tag_name = other[1..].to_string();
|
||||
let values: Vec<String> = map.next_value()?;
|
||||
@@ -563,6 +580,7 @@ impl<'de> Visitor<'de> for FilterVisitor {
|
||||
since,
|
||||
until,
|
||||
limit,
|
||||
search,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -634,6 +652,7 @@ impl Filter {
|
||||
/// (structural shape)
|
||||
/// - The exact `since` and `until` values (different time windows
|
||||
/// cannot be combined)
|
||||
/// - The exact `search` query (different searches cannot be combined)
|
||||
///
|
||||
/// A filter with a `limit` always gets a unique group key, because
|
||||
/// merging limited filters would change result-count semantics.
|
||||
@@ -649,6 +668,7 @@ impl Filter {
|
||||
|
||||
self.since.hash(&mut hasher);
|
||||
self.until.hash(&mut hasher);
|
||||
self.search.hash(&mut hasher);
|
||||
|
||||
if self.limit.is_some() {
|
||||
// Each limited filter gets a unique group — merging two
|
||||
@@ -669,7 +689,9 @@ and a filter on `#p` tags land in different groups — as they should,
|
||||
since merging them by union would change the semantics. Likewise, two
|
||||
filters with different `since` or `until` values land in different
|
||||
groups, because a union of their sets under one time window would either
|
||||
over-fetch or under-fetch relative to what was requested.
|
||||
over-fetch or under-fetch relative to what was requested. The `search`
|
||||
query is treated the same way: two filters with different searches can
|
||||
never be merged, so each distinct search forms its own group.
|
||||
|
||||
## Union and intersection
|
||||
|
||||
@@ -741,8 +763,9 @@ the earliest `until`. Finally it passes the result through
|
||||
///
|
||||
/// Set fields are unioned. Time windows are tightened: the latest
|
||||
/// `since` and earliest `until` win. If both filters have a `limit`,
|
||||
/// the larger one is kept. The result is simplified with
|
||||
/// [`union_filters`].
|
||||
/// the larger one is kept. Two filters carrying *different* searches
|
||||
/// cannot be combined into one, so the pair is kept separate instead.
|
||||
/// The result is simplified with [`union_filters`].
|
||||
pub fn intersect_filters(groups: &[Vec<Filter>]) -> Vec<Filter> {
|
||||
let Some(first) = groups.first() else {
|
||||
return vec![];
|
||||
@@ -755,7 +778,15 @@ pub fn intersect_filters(groups: &[Vec<Filter>]) -> Vec<Filter> {
|
||||
|
||||
for f1 in &result {
|
||||
for f2 in filters {
|
||||
combined.push(combine_pair(f1, f2));
|
||||
match combine_pair(f1, f2) {
|
||||
Some(f) => combined.push(f),
|
||||
// Two different searches can't be combined into one
|
||||
// filter; keep both so neither query is lost.
|
||||
None => {
|
||||
combined.push(f1.clone());
|
||||
combined.push(f2.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -765,7 +796,16 @@ pub fn intersect_filters(groups: &[Vec<Filter>]) -> Vec<Filter> {
|
||||
union_filters(&result)
|
||||
}
|
||||
|
||||
fn combine_pair(a: &Filter, b: &Filter) -> Filter {
|
||||
fn combine_pair(a: &Filter, b: &Filter) -> Option<Filter> {
|
||||
// Two different searches cannot be expressed as a single search, so
|
||||
// there is no filter that satisfies both. Returning `None` tells the
|
||||
// caller to keep the pair separate rather than fabricate one.
|
||||
if let (Some(s1), Some(s2)) = (&a.search, &b.search) {
|
||||
if s1 != s2 {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let mut f = Filter::new();
|
||||
|
||||
f.ids = union_option_sets(&a.ids, &b.ids);
|
||||
@@ -794,7 +834,10 @@ fn combine_pair(a: &Filter, b: &Filter) -> Filter {
|
||||
(l, None) | (None, l) => l,
|
||||
};
|
||||
|
||||
f
|
||||
// At most one search is present here (equal searches collapse to one).
|
||||
f.search = a.search.clone().or_else(|| b.search.clone());
|
||||
|
||||
Some(f)
|
||||
}
|
||||
|
||||
fn union_option_sets<T: Ord + Clone>(
|
||||
@@ -815,6 +858,8 @@ fn union_option_sets<T: Ord + Clone>(
|
||||
|
||||
## What's next
|
||||
|
||||
The next chapter extends filters with NIP-50 full-text search — an
|
||||
optional `search` field that some relays support for content-based
|
||||
queries.
|
||||
The `search` field rides along through serialization, grouping, and the
|
||||
set algebra here, but it has no meaning yet — `matches` doesn't look at
|
||||
it, and the string is uninterpreted. The next chapter takes up NIP-50
|
||||
full-text search: a typed query model that separates terms from
|
||||
extensions, a local relevance score, and relevance-ordered results.
|
||||
|
||||
@@ -0,0 +1,214 @@
|
||||
# Search
|
||||
|
||||
NIP-50 adds one field to the filter from the previous chapter: a `search`
|
||||
string. A relay that advertises the capability reads the string as a
|
||||
human-readable query — `best nostr apps` — matches it against event content,
|
||||
and returns results ordered by relevance rather than by `created_at`, with
|
||||
`limit` applied after ranking.
|
||||
|
||||
Search is opt-in and implementation-defined. Relays decide whether they index events
|
||||
at all, what matches, and how ranking works. The query may also carry
|
||||
`key:value` extensions — `domain:`, `language:`, `sentiment:`, `nsfw:`,
|
||||
`include:spam` — and a relay honors only the ones it understands, ignoring the
|
||||
rest. There is no global index and no guarantee of completeness: a client
|
||||
queries the relays it believes support search and accepts a partial view.
|
||||
|
||||
Search may be implemented relay-side, or it may be performed on a client in some
|
||||
situations. This chapter provides utilities for parsing search terms along with
|
||||
a very basic model for implementing search that is decoupled from filter matching
|
||||
itself and entirely opt-in.
|
||||
|
||||
## The module
|
||||
|
||||
```rust {file=coracle-lib/src/lib.rs}
|
||||
pub mod search;
|
||||
```
|
||||
|
||||
```rust {file=coracle-lib/src/search.rs}
|
||||
//! NIP-50 full-text search queries.
|
||||
//!
|
||||
//! A [`SearchQuery`] holds the terms of a search string and computes a
|
||||
//! best-effort relevance score against event content — for the case where
|
||||
//! search runs on the client, over events already in hand, rather than on a
|
||||
//! relay.
|
||||
|
||||
use std::fmt;
|
||||
```
|
||||
|
||||
## The query model
|
||||
|
||||
A `SearchQuery` is just the query's terms: the words split out of the search
|
||||
string. NIP-50 also defines `key:value` extensions, but their meaning is
|
||||
relay-defined, and the local scorer has no way to evaluate `sentiment:negative`
|
||||
or `domain:example.com` without data it doesn't have. Rather than model
|
||||
extensions we can't honor, we treat every token as a term. A relay that
|
||||
understands an extension still sees it verbatim in the query string; the local
|
||||
scorer simply matches it as text like any other word.
|
||||
|
||||
```rust {file=coracle-lib/src/search.rs}
|
||||
/// A parsed NIP-50 search query: the terms of the query string.
|
||||
///
|
||||
/// NIP-50 `key:value` extensions are not modeled separately — their semantics
|
||||
/// are relay-defined and cannot be evaluated locally, so each is kept as an
|
||||
/// ordinary term.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Default)]
|
||||
pub struct SearchQuery {
|
||||
/// The query's terms, in order.
|
||||
pub terms: Vec<String>,
|
||||
}
|
||||
```
|
||||
|
||||
### Parsing
|
||||
|
||||
Parsing splits the query on whitespace. Every token becomes a term, including
|
||||
anything that looks like an extension. There is nothing to reject, so parsing is
|
||||
total — it never errors.
|
||||
|
||||
```rust {file=coracle-lib/src/search.rs}
|
||||
impl SearchQuery {
|
||||
/// Create an empty query.
|
||||
pub fn new() -> Self {
|
||||
SearchQuery::default()
|
||||
}
|
||||
|
||||
/// Parse a raw query string by splitting it on whitespace. Every token,
|
||||
/// extension-like or not, becomes a term. Parsing never fails.
|
||||
pub fn parse(input: &str) -> Self {
|
||||
SearchQuery {
|
||||
terms: input.split_whitespace().map(str::to_string).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// True when the query has no terms.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.terms.is_empty()
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Rendering joins the terms back into a query string. It is the inverse of
|
||||
parsing: feeding the output of one into the other gives an equal query, modulo
|
||||
runs of whitespace collapsing to single spaces.
|
||||
|
||||
```rust {file=coracle-lib/src/search.rs}
|
||||
impl fmt::Display for SearchQuery {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(&self.terms.join(" "))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Scoring
|
||||
|
||||
NIP-50 returns results in descending order of relevance, so a boolean "matches
|
||||
or not" is the wrong shape for a local implementation. The scorer instead
|
||||
returns a number in `0.0..=1.0`, which can drive both inclusion (anything above
|
||||
zero is a hit) and ordering.
|
||||
|
||||
The score has two parts. The base is the fraction of the query's terms that
|
||||
appear in the content, compared case-insensitively — three terms, two present,
|
||||
gives `2/3`. On top of that, repeated occurrences add a small, diminishing
|
||||
bonus, so that among events matching the same set of terms the ones that mention
|
||||
them more often rank higher. The bonus is bounded below `1/total`, which means
|
||||
it can reorder events *within* a fraction but can never push a partial match up
|
||||
to a full one: a missing term always costs more than any number of repetitions
|
||||
can recover. An empty query — no terms — scores `1.0`, since there is no text to
|
||||
constrain.
|
||||
|
||||
```rust {file=coracle-lib/src/search.rs}
|
||||
impl SearchQuery {
|
||||
/// Score `content` against this query's terms, in `0.0..=1.0`.
|
||||
///
|
||||
/// The base score is the fraction of the query's terms found in the content
|
||||
/// (case-insensitive substring). Repeated occurrences add a diminishing
|
||||
/// bonus, strictly less than one term's worth, so a partial match never
|
||||
/// reaches `1.0`. An empty query scores `1.0`: there is no text to match.
|
||||
pub fn score(&self, content: &str) -> f64 {
|
||||
let total = self.terms.len();
|
||||
if total == 0 {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
let haystack = content.to_lowercase();
|
||||
|
||||
let mut matched = 0usize;
|
||||
let mut extra = 0usize;
|
||||
for term in &self.terms {
|
||||
let needle = term.to_lowercase();
|
||||
if needle.is_empty() {
|
||||
// An empty term imposes no constraint; treat it as present.
|
||||
matched += 1;
|
||||
continue;
|
||||
}
|
||||
let count = haystack.matches(needle.as_str()).count();
|
||||
if count > 0 {
|
||||
matched += 1;
|
||||
extra += count - 1;
|
||||
}
|
||||
}
|
||||
|
||||
let base = matched as f64 / total as f64;
|
||||
let bonus = (1.0 - 1.0 / (1.0 + extra as f64)) / total as f64;
|
||||
(base + bonus).min(1.0)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Lowercasing uses `to_lowercase`, which folds case across Unicode rather than
|
||||
only ASCII. That allocates, but nostr content is multilingual, and correctness
|
||||
on non-Latin text is worth more than avoiding a copy in a best-effort matcher.
|
||||
|
||||
## Connecting queries to filters
|
||||
|
||||
The previous chapter gave `Filter` a `search` field but no way to set it. The
|
||||
setters follow the established `add_*` / `clear_*` vocabulary.
|
||||
|
||||
```rust {file=coracle-lib/src/filters.rs}
|
||||
use crate::search::SearchQuery;
|
||||
|
||||
impl Filter {
|
||||
/// Set the NIP-50 search query.
|
||||
pub fn add_search(mut self, search: impl Into<String>) -> Self {
|
||||
self.search = Some(search.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Remove the search query, leaving no search constraint.
|
||||
pub fn clear_search(mut self) -> Self {
|
||||
self.search = None;
|
||||
self
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Scoring an event against a filter is then a matter of parsing the field and
|
||||
delegating to `SearchQuery::score`. With no search set the method returns `1.0`,
|
||||
so an unsearched filter never penalizes an event. This is purely the search
|
||||
dimension — it is independent of the structural `matches` check from the
|
||||
previous chapter, and the two are meant to be composed by the caller, not folded
|
||||
together. A consumer that wants search-ranked results filters with `matches`,
|
||||
scores with `search_score`, and sorts as it sees fit.
|
||||
|
||||
```rust {file=coracle-lib/src/filters.rs}
|
||||
impl Filter {
|
||||
/// Best-effort local relevance score for `event`, in `0.0..=1.0`.
|
||||
///
|
||||
/// Parses the `search` field and scores it against the event's content,
|
||||
/// returning `1.0` when there is no search. This considers *only* the
|
||||
/// `search` field; it is independent of [`matches`](Filter::matches).
|
||||
pub fn search_score(&self, event: &Event) -> f64 {
|
||||
match &self.search {
|
||||
Some(query) => SearchQuery::parse(query).score(&event.content),
|
||||
None => 1.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## What's next
|
||||
|
||||
Search depends on routing the query to a relay that actually supports it.
|
||||
Discovering which relays advertise NIP-50, and choosing among them, is a
|
||||
networking and relay-metadata concern — the subject of the Domain and Networking
|
||||
sections, where relay selection is built on top of the filter types assembled
|
||||
here.
|
||||
@@ -0,0 +1,189 @@
|
||||
# Plan: Search
|
||||
|
||||
## Topic Summary
|
||||
|
||||
NIP-50 adds an optional full-text `search` field to the subscription filter from
|
||||
chapter 11. A relay that supports the capability interprets the query against
|
||||
event content (and, for some kinds, other fields), returning results ordered by
|
||||
relevance rather than `created_at`, with `limit` applied after ranking. The
|
||||
query may carry `key:value` extensions — `domain:`, `language:`, `sentiment:`,
|
||||
`nsfw:`, `include:spam` — which relays may support or ignore.
|
||||
|
||||
This chapter extends `Filter` with a `search` field, threads it through
|
||||
serialization / grouping / set algebra, introduces a typed `SearchQuery` that
|
||||
splits free-text terms from `key:value` extensions, and implements a best-effort
|
||||
local relevance **score in [0, 1]** used to both include and rank events —
|
||||
mirroring the NIP's "descending order by quality of result, limit last."
|
||||
|
||||
## Chapter Outline
|
||||
|
||||
1. **Intro / framing** — Search as a relay-defined, optional capability; content
|
||||
discovery is client-initiated routing, not a global index; results are
|
||||
partial and ranked by the relay. The local matcher is an honest best-effort
|
||||
fallback, not a reimplementation of relay search.
|
||||
2. **The `search` field** — Add `search: Option<String>` to `Filter`; builder
|
||||
methods `add_search` / `clear_search`; note it joins the derived `Hash` (so
|
||||
`id()` covers it for free).
|
||||
3. **Serialization** — Emit/parse a plain `"search"` key in the hand-written
|
||||
serde impl, present only when `Some`.
|
||||
4. **The `SearchQuery` model** — A new `search` module: terms + ordered
|
||||
`key:value` extensions, `parse`, `Display`, builders, and the `Filter` bridge.
|
||||
5. **Scoring & matching** — `search_score` (fraction-of-terms + diminishing
|
||||
frequency bonus, capped at 1.0); `matches` includes an event when score > 0;
|
||||
`rank_search_results` sorts by score then `created_at` and applies `limit`.
|
||||
6. **Grouping and set algebra** — `search` enters `group()` (distinct searches
|
||||
never merge); `union_filters` carries it through unchanged; `intersect_filters`
|
||||
keeps a conflicting-search pair separate instead of fabricating a combined query.
|
||||
7. **What's next** — Brief pointer to the Domain section (relay selection,
|
||||
discovering NIP-50-capable relays via relay metadata, is a later concern).
|
||||
|
||||
## API Design
|
||||
|
||||
### `coracle-lib/src/filters.rs` (extends existing `Filter`)
|
||||
|
||||
```rust
|
||||
pub struct Filter {
|
||||
// ... existing fields ...
|
||||
/// NIP-50 full-text search query. Relay-interpreted; see `SearchQuery`.
|
||||
pub search: Option<String>,
|
||||
}
|
||||
|
||||
impl Filter {
|
||||
pub fn add_search(self, search: impl Into<String>) -> Self; // sets Some
|
||||
pub fn clear_search(self) -> Self; // sets None
|
||||
|
||||
/// Bridge to the typed model.
|
||||
pub fn add_search_query(self, query: &SearchQuery) -> Self; // = add_search(query.to_string())
|
||||
pub fn search_query(&self) -> Option<SearchQuery>; // parse the field back
|
||||
|
||||
/// Best-effort local relevance score in [0.0, 1.0].
|
||||
/// Returns 1.0 when there is no search, or a search with no free-text
|
||||
/// terms (only extensions, which are unenforceable locally).
|
||||
pub fn search_score(&self, event: &Event) -> f64;
|
||||
}
|
||||
|
||||
/// Filter `events` to those matching `filter`, sort by relevance
|
||||
/// (search_score desc, then created_at desc), and apply `filter.limit`.
|
||||
pub fn rank_search_results<'a>(filter: &Filter, events: &'a [Event]) -> Vec<&'a Event>;
|
||||
```
|
||||
|
||||
`matches` gains a final check: `if self.search_score(event) == 0.0 { return false }`.
|
||||
Because `search_score` returns 1.0 when there is no search (or no terms), this
|
||||
only rejects when a search *with terms* matched none of them — i.e. "any term
|
||||
present ⇒ included."
|
||||
|
||||
### `coracle-lib/src/search.rs` (new module)
|
||||
|
||||
```rust
|
||||
/// A parsed NIP-50 search query: free-text terms plus `key:value` extensions.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Default)]
|
||||
pub struct SearchQuery {
|
||||
pub terms: Vec<String>,
|
||||
pub extensions: Vec<(String, String)>, // ordered; repeats allowed
|
||||
}
|
||||
|
||||
impl SearchQuery {
|
||||
pub fn new() -> Self;
|
||||
/// Total parse: split on whitespace; a token is an extension iff it is
|
||||
/// `key:value` with key in [A-Za-z0-9_-]+, non-empty value not starting
|
||||
/// with '/'. Everything else is a term. Never fails.
|
||||
pub fn parse(input: &str) -> Self;
|
||||
pub fn add_term(self, term: impl Into<String>) -> Self;
|
||||
pub fn add_extension(self, key: impl Into<String>, value: impl Into<String>) -> Self;
|
||||
pub fn is_empty(&self) -> bool;
|
||||
}
|
||||
|
||||
impl fmt::Display for SearchQuery { /* terms first, then "key:value" exts, space-joined */ }
|
||||
```
|
||||
|
||||
`Filter::matches` / `search_score` tokenize via `SearchQuery::parse`, using only
|
||||
`terms` (extensions are ignored by the local matcher).
|
||||
|
||||
### Scoring formula (`search_score`)
|
||||
|
||||
For the parsed query's distinct `terms` (case-insensitive), against
|
||||
`event.content` lowercased:
|
||||
|
||||
- `total` = number of distinct terms; if 0 → return 1.0.
|
||||
- For each term, `count` = non-overlapping occurrences in content.
|
||||
- `matched` = terms with `count ≥ 1`; `extra` = (Σ count) − matched (repeats
|
||||
beyond the first hit of each matched term).
|
||||
- `base = matched / total` (fraction of terms present, in [0, 1]).
|
||||
- `bonus = (1 − 1/(1 + extra)) / total` (diminishing, strictly `< 1/total`, so a
|
||||
partial match never reaches the next term's bucket).
|
||||
- `score = (base + bonus).min(1.0)`.
|
||||
|
||||
Properties (asserted in tests): in [0, 1]; all terms once ⇒ 1.0; missing a term
|
||||
⇒ `< 1.0`; more occurrences ⇒ ≥ score (monotonic, never exceeds 1.0); no terms
|
||||
matched ⇒ exactly 0.0.
|
||||
|
||||
## Code Organization
|
||||
|
||||
- **`coracle-lib/src/filters.rs`** — add the `search` field, builders, the
|
||||
serde changes, `search_score`, the `matches` check, `rank_search_results`,
|
||||
and the `group()` / `intersect_filters` updates. `use crate::search::SearchQuery;`.
|
||||
- **`coracle-lib/src/search.rs`** — the `SearchQuery` type. New `pub mod search;`
|
||||
in `lib.rs`, placed before `filters` (filters depends on it).
|
||||
- **`coracle-lib/src/prelude.rs`** — add `pub use crate::search::SearchQuery;`
|
||||
(the prelude already re-exports commonly used items).
|
||||
- **`coracle-lib/tests/search.rs`** — hand-written integration tests (not tangled).
|
||||
|
||||
## Dependencies
|
||||
|
||||
None new. Parsing and matching use `std` only. No FTS engine — out of scope and
|
||||
against the minimal-dependency rule.
|
||||
|
||||
## Narrative Notes
|
||||
|
||||
- Open with the philosophy: search is opt-in and relay-defined; no global index;
|
||||
results partial and relay-ranked. Frame the local scorer as a fallback for
|
||||
in-memory/offline querying, and warn (per rust-nostr's SDK) that re-filtering a
|
||||
relay's returned results client-side can wrongly drop legitimate hits — relays
|
||||
rank with richer, extension-aware logic.
|
||||
- Explain *why* extensions are parsed but **ignored locally**: `sentiment:`,
|
||||
`domain:`, etc. require data the client doesn't have, so honoring them locally
|
||||
is impossible; we keep them in the typed model for *building/inspecting*
|
||||
queries, not for local evaluation.
|
||||
- Justify the score model concretely: NIP-50 mandates relevance ordering, so a
|
||||
boolean match is the wrong shape — a [0,1] score lets us both include
|
||||
(score > 0) and rank. Walk through the fraction + diminishing-bonus formula
|
||||
with a small worked example.
|
||||
- For grouping: reuse the chapter-11 reasoning — two filters with different
|
||||
searches can't be unioned without changing semantics, so `search` joins the
|
||||
group key. Show that `union_filters` then keeps them separate automatically.
|
||||
- For `intersect_filters`: explain the one structural change — `combine_pair`
|
||||
returns `Option<Filter>`; a pair whose two searches differ returns `None`, and
|
||||
the caller emits both filters separately rather than concatenating queries.
|
||||
|
||||
## Design Decisions
|
||||
|
||||
1. **Typed `SearchQuery`, lean/generic.** Terms + a generic ordered list of
|
||||
`key:value` extensions, with `add_term`/`add_extension`. No per-extension
|
||||
helpers or typed enums — keeps the surface small and forward-compatible with
|
||||
relay-specific extensions. (Every reference treats search as opaque; the typed
|
||||
model is our value-add.)
|
||||
2. **Local relevance score in [0, 1]**, fraction-of-terms + diminishing frequency
|
||||
bonus, capped at 1.0. Chosen over a boolean to model NIP-50's relevance
|
||||
ordering. Extensions excluded from scoring.
|
||||
3. **`matches` includes on score > 0** ("any term present"); ranking via
|
||||
`rank_search_results` handles relevance + `limit`-after-sort.
|
||||
4. **`search` participates in `group()`**, so `union_filters` never merges
|
||||
distinct searches.
|
||||
5. **`intersect_filters` keeps a conflicting-search pair separate** (combine
|
||||
returns `Option`, `None` ⇒ emit both) rather than concatenating, per the
|
||||
user's choice.
|
||||
6. **Builder naming `add_search`/`clear_search`** to match the existing
|
||||
`add_since`/`clear_since` vocabulary (not rust-nostr's `search`/`remove_search`).
|
||||
7. **Unicode-aware lowercasing** (`to_lowercase`) for the local matcher rather
|
||||
than ASCII-only, given multilingual nostr content; note the allocation
|
||||
trade-off. Substring counting via `str::matches`.
|
||||
8. **Extension parse heuristic** documented: a colon-bearing token like a URL may
|
||||
be read as an extension; applications needing exact control build
|
||||
`SearchQuery` field-by-field instead of parsing.
|
||||
|
||||
## Open Questions
|
||||
|
||||
- Exact wording of the frequency-bonus explanation — keep the formula in prose
|
||||
light; lean on a worked example. (Resolved during writing.)
|
||||
- Whether `rank_search_results` belongs as a free function (consistent with
|
||||
`matches_any`/`union_filters`) — yes, free function.
|
||||
@@ -0,0 +1,307 @@
|
||||
# Research: Search
|
||||
|
||||
## Topic Summary
|
||||
|
||||
NIP-50 adds an optional full-text `search` field to the subscription filter
|
||||
introduced in chapter 11. A relay that supports the capability interprets the
|
||||
query string against event content (and, for some kinds, other fields),
|
||||
returning results ordered by relevance rather than `created_at`. The query may
|
||||
carry structured extensions in the form of `key:value` pairs — `domain:`,
|
||||
`language:`, `sentiment:`, `nsfw:`, `include:spam` — which relays may support or
|
||||
ignore.
|
||||
|
||||
The chapter will:
|
||||
|
||||
1. Add a `search` field to the existing `Filter` type, wiring it through
|
||||
construction, serialization, hashing, grouping, and the union/intersect
|
||||
utilities.
|
||||
2. Introduce a typed `SearchQuery` model that splits free-text terms from
|
||||
`key:value` extensions, so applications can build and inspect queries safely
|
||||
instead of stringly-typed concatenation. (This is a deliberate departure
|
||||
from every reference, which treats the query as an opaque string.)
|
||||
3. Implement a best-effort, case-insensitive local matcher over event content,
|
||||
while documenting that real ranking and extension semantics are
|
||||
relay-defined.
|
||||
|
||||
The code lives in `coracle-lib`: the `search` field extends `filters.rs`, and
|
||||
the query model gets a dedicated `search.rs` module.
|
||||
|
||||
## Philosophy
|
||||
|
||||
From `ref/building-nostr`, the framing relevant to search is that **content
|
||||
discovery on nostr is client-initiated routing through relay selection**, not a
|
||||
query against a global index. Searching is "knowing where to send queries." A
|
||||
relay that supports NIP-50 is exercising an *optional, relay-authored
|
||||
capability* — like content curation or access control — and defines its own
|
||||
matching semantics, including which extensions it honors. This mirrors the NIP's
|
||||
own "relays SHOULD ignore extensions they don't support."
|
||||
|
||||
Three principles bear directly on the chapter's voice:
|
||||
|
||||
- **No guaranteed completeness.** "No implementation will have a complete view
|
||||
of every heuristic that is applicable" — so search results are neither global
|
||||
nor exhaustive. A client queries the relays it knows support search and
|
||||
accepts a partial, spontaneous view. This should be stated honestly, not hidden.
|
||||
- **Indexing is the curator's responsibility, not the user's.** Authors publish
|
||||
signed events; relays (or indexing services) that *want* content discoverable
|
||||
maintain the index. Clients do nothing special beyond sending a `search`
|
||||
filter to a search-capable relay.
|
||||
- **Publicity, not privacy.** Full-text indexing makes content patterns
|
||||
discoverable and gives relay operators visibility into queries. The honest
|
||||
framing: search is a publicity feature.
|
||||
|
||||
The takeaway for our library: model `search` as a first-class but optional
|
||||
filter field, keep the query structured enough that applications can reason
|
||||
about it, and be candid that local matching is a best-effort approximation of a
|
||||
relay-defined operation.
|
||||
|
||||
## Reference Implementation Analysis
|
||||
|
||||
### applesauce
|
||||
|
||||
`search` is an optional string on an extended `Filter` type
|
||||
(`packages/core/src/helpers/filter.ts`): `Filter = CoreFilter & { search?: string }`,
|
||||
extending nostr-tools' base type. **Opaque** — no extension parsing.
|
||||
|
||||
Dual-mode: relay subscriptions pass the string through verbatim; a local SQLite
|
||||
backend (`packages/sqlite`) indexes content into an FTS5 table and runs
|
||||
`events_search MATCH ?` with the raw string double-quote-escaped. Local
|
||||
client-side `matchFilter()` **ignores** the search field entirely. Pluggable
|
||||
"search content formatters" decide what gets indexed (default: `content`;
|
||||
enhanced: kind-0 profile fields plus `t`/`subject`/`title`/`summary`/`d` tags).
|
||||
Supports `order: "created_at" | "rank"` for FTS5 ranking. Low coupling; SQLite
|
||||
is optional. No query-extension awareness anywhere.
|
||||
|
||||
### ndk
|
||||
|
||||
`search?: string` on `NDKFilter` (`core/src/subscription/index.ts:30`).
|
||||
**Opaque, relay-only.** No parsing, no validation (filter-validation pipeline
|
||||
skips it), no client-side matching (delegates to nostr-tools' `matchFilters`,
|
||||
which ignores search). No helper functions for building search filters; callers
|
||||
construct `{ search: "..." }` by hand. The field is serialized and sent to
|
||||
relays as-is. No NIP-11 capability negotiation or fallback. Minimal by design.
|
||||
|
||||
### nostr-gadgets
|
||||
|
||||
Re-uses `@nostr/tools`' `Filter` type (`search?: string`). **Opaque,
|
||||
relay-only.** Notably its local stores *reject* search: the in-memory store
|
||||
returns an empty set if `filter.search` is present, and the RedEventStore docs
|
||||
state "any filters supported (except 'search')." Provides a hardcoded
|
||||
`SEARCH_RELAYS` constant (`defaults.ts`): `relay.nostr.band`, `nostr.wine`,
|
||||
`relay.noswhere.com`, `relay.nos.today`. No query builders, no dynamic relay
|
||||
capability detection.
|
||||
|
||||
### nostrlib (Go)
|
||||
|
||||
`Search string` on the `Filter` struct (`filter.go`), (de)serialized as a plain
|
||||
`"search"` JSON key. The core `Filter.Matches` / `MatchesIgnoringTimestampConstraints`
|
||||
**ignores** search — matching is delegated to eventstore backends. Key-value
|
||||
backends (BoltDB, LMDB, MMM) return nothing for search queries; only the **Bleve**
|
||||
backend implements real full-text search: per-document language auto-detection
|
||||
(lingua-go, 22 languages), per-language analyzers, boolean query syntax
|
||||
(`AND/OR/NOT`, parens, quoted phrases), NIP-27 reference extraction with 2× boost,
|
||||
and case-insensitive substring validation of quoted phrases. Kind-0 profiles index
|
||||
name/display_name/about; reposts unpack inner events. Khatru relay policies
|
||||
`NoSearchQueries`/`RemoveSearchQueries` let operators disable search. SDK
|
||||
`SearchUsers()` just sends a `Search` filter to designated user-search relays. No
|
||||
NIP-50 *extension* parsing (treats `domain:x` as a regular word); a 2-char minimum
|
||||
query length is enforced by Bleve.
|
||||
|
||||
### nostr-tools
|
||||
|
||||
`search?: string` on the base `Filter` (`filter.ts`). **The canonical
|
||||
"defined-but-unused" implementation.** `matchFilter()`/`matchFilters()` do not
|
||||
check search at all; `mergeFilters()` drops it entirely. No parsing, no
|
||||
validation, no helpers, no tests for the field. Strictly a transport-layer
|
||||
placeholder so applications can send search filters to relays. Minimal-deps
|
||||
philosophy: search is purely a relay concern.
|
||||
|
||||
### rust-nostr
|
||||
|
||||
The most directly relevant reference (also Rust). In
|
||||
`crates/nostr/src/filter.rs`:
|
||||
|
||||
```rust
|
||||
/// A string describing a query in a human-readable form, i.e. "best nostr apps"
|
||||
/// <https://github.com/nostr-protocol/nips/blob/master/50.md>
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub search: Option<String>,
|
||||
```
|
||||
|
||||
Builder API: `search<S: Into<String>>(self, value: S) -> Self` and
|
||||
`remove_search(self) -> Self` — symmetric, generic, `#[inline]`. **Opaque** (no
|
||||
extension parsing).
|
||||
|
||||
Local matching (`search_match`):
|
||||
|
||||
```rust
|
||||
fn search_match(&self, event: &Event) -> bool {
|
||||
match &self.search {
|
||||
Some(query) => event.content.as_bytes()
|
||||
.windows(query.len())
|
||||
.any(|window| window.eq_ignore_ascii_case(query.as_bytes())),
|
||||
None => true,
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Case-insensitive **ASCII** substring via sliding window; `None` matches
|
||||
everything. Gated by a `MatchEventOptions { nip50: bool, .. }` flag (default
|
||||
true). Notably, the SDK relay sets `.nip50(false)` with the comment "Skip NIP-50
|
||||
matches since they may create issues and ban non-malicious relays" — i.e.
|
||||
client-side re-matching of a relay's search results can wrongly drop valid hits.
|
||||
DB backends (LMDB, SQLite) extend matching to a fixed set of searchable tags —
|
||||
`title`, `description`, `subject`, `name` — lowercasing the query once up front;
|
||||
empty search → no results. A `Features { full_text_search: bool }` flag declares
|
||||
backend capability.
|
||||
|
||||
Patterns worth emulating: `Into<String>` builder, `skip_serializing_if` for a
|
||||
clean wire format, an explicit opt-out for search matching, ASCII case folding
|
||||
for speed.
|
||||
|
||||
### welshman
|
||||
|
||||
The TypeScript toolkit our library descends from. `search?: string` on `Filter`
|
||||
(`packages/util/src/Filters.ts`). It is the **only reference that matches search
|
||||
locally and threads it through filter utilities**:
|
||||
|
||||
```typescript
|
||||
export const matchFilter = (filter, event) => {
|
||||
if (!nostrToolsMatchFilter(filter, event)) return false
|
||||
if (filter.search) {
|
||||
const content = event.content.toLowerCase()
|
||||
const terms = filter.search.toLowerCase().split(/\s+/g)
|
||||
for (const term of terms) {
|
||||
if (content.includes(term)) return true
|
||||
return false // <-- bug: returns after first term
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
```
|
||||
|
||||
The intent is term-splitting + case-insensitive substring, but the early
|
||||
`return false` means only the first term is ever checked. **A correct version
|
||||
should decide AND vs OR across terms explicitly** — this is the one place we can
|
||||
clearly improve on the reference.
|
||||
|
||||
Filter utilities (directly parallel to our `group`/`union_filters`/`intersect_filters`):
|
||||
|
||||
- `calculateFilterGroup` pushes `search:${search}` into the group key — **a
|
||||
filter with a search is only mergeable with an identical search.**
|
||||
- `unionFilters` treats `search` (like `since`/`until`/`limit`) as a scalar
|
||||
preserved from the first filter in the group, **not merged**.
|
||||
- `intersectFilters` concatenates differing searches with a space
|
||||
(`[a, b].join(" ")`) — modeling "must match both" as a compound query — and
|
||||
takes whichever is present otherwise.
|
||||
- `getFilterId` includes search in the deterministic hash, so different searches
|
||||
never dedupe.
|
||||
|
||||
Search-relay selection lives in the router: `getSearchRelays()` returns relays
|
||||
whose NIP-11 `supported_nips` includes `"50"`. No extension parsing.
|
||||
|
||||
## Common Patterns
|
||||
|
||||
- **`search` is universally an optional plain string.** Every reference models
|
||||
it as `Option<String>` / `search?: string`. None parse the `key:value`
|
||||
extensions — they treat the whole query as opaque and let the relay interpret
|
||||
it. Our typed `SearchQuery` is therefore a value-add, not a port.
|
||||
- **Local matching is the exception, not the rule.** nostr-tools, ndk,
|
||||
applesauce (in `matchFilter`), and nostrlib's core `Filter` all *ignore*
|
||||
search locally; matching happens relay-side (or in a dedicated index like
|
||||
Bleve/FTS5). Only rust-nostr and welshman attempt local matching, both with
|
||||
case-insensitive substring over `content`.
|
||||
- **Where matching exists, it's case-insensitive substring** — rust-nostr does
|
||||
ASCII-only `eq_ignore_ascii_case` over byte windows (whole query as one
|
||||
needle); welshman lowercases and splits on whitespace into terms (intending
|
||||
multi-term, buggily). DB backends additionally search a small fixed set of
|
||||
metadata tags (`title`, `description`, `subject`, `name`).
|
||||
- **Search makes filters un-mergeable.** Both welshman (group key) and the
|
||||
general intuition agree: two filters with different search strings can't be
|
||||
unioned without changing semantics. rust-nostr sidesteps merging at this layer
|
||||
entirely.
|
||||
- **Client-side re-matching is risky.** rust-nostr's SDK disables NIP-50
|
||||
matching when filtering relay results, because a relay's notion of a match
|
||||
(ranked, fuzzy, multi-field, extension-aware) is richer than a client's
|
||||
substring check — re-filtering can drop legitimate hits.
|
||||
- **Relay selection by NIP-11.** Search-capable relays are discovered via
|
||||
`supported_nips` containing `50` (welshman) or a hardcoded allowlist
|
||||
(nostr-gadgets). This is an application/networking concern, out of scope for
|
||||
`coracle-lib`.
|
||||
|
||||
## Considerations for Our Implementation
|
||||
|
||||
**Filter field.** Add `pub search: Option<String>` to `Filter`. Follow
|
||||
rust-nostr: `add_search<S: Into<String>>(self, S)` and `clear_search(self)` to
|
||||
match the existing `add_*`/`clear_*` builder vocabulary (our methods are named
|
||||
`add_since`/`clear_since`, etc., so `add_search`/`clear_search` fits better than
|
||||
rust-nostr's `search`/`remove_search`). The field already participates in the
|
||||
derived `Hash` (so `id()` covers it for free), but serialization, `group()`,
|
||||
`union_filters`, `intersect_filters`, and `matches()` all need explicit updates.
|
||||
|
||||
**Serialization.** Our `Filter` has hand-written serde (to flatten `#tag` keys).
|
||||
Add `search` as a plain `"search"` key — emit only when `Some` (mirroring
|
||||
`since`/`until`/`limit`), and read it in the visitor's match arm. A round-trip
|
||||
test must cover it.
|
||||
|
||||
**Grouping / union / intersect.** Per welshman: include `search` in the
|
||||
`group()` hash so filters with different searches land in different groups (never
|
||||
merged). In `union_filters`, since group members share an identical search by
|
||||
construction, the search carries over via the `or_insert_with(|| filter.clone())`
|
||||
seed — no special merge needed, but worth a comment. In `combine_pair`
|
||||
(intersect), decide how to combine two searches: welshman concatenates with a
|
||||
space. Concatenation is defensible ("must match both") but lossy and surprising;
|
||||
a cleaner rule for a typed model is to **merge two `SearchQuery` values** (union
|
||||
their terms and extensions) or, if we keep the field as a string at this layer,
|
||||
to concatenate with a space and document it. Recommend: concatenate with a space
|
||||
when both present and differ, matching welshman, and note the limitation.
|
||||
|
||||
**Local matching.** Extend `Filter::matches` to test `search` *after* the cheap
|
||||
scalar checks. Best-effort, case-insensitive. Two design choices to settle in
|
||||
planning:
|
||||
1. Whole-query substring (rust-nostr) vs. term-split AND/OR (welshman, fixed).
|
||||
A typed `SearchQuery` makes term-split natural: match the free-text terms
|
||||
(AND across terms reads as the intuitive "all words present"; document it),
|
||||
and treat `key:value` extensions as *unenforceable locally* — i.e. ignored by
|
||||
the local matcher, since we can't evaluate `sentiment:` or `domain:` without
|
||||
external data. This honesty matches the NIP.
|
||||
2. ASCII (`eq_ignore_ascii_case`) vs. Unicode lowercasing. ASCII is what
|
||||
rust-nostr ships and is allocation-free; Unicode `to_lowercase` is more
|
||||
correct for non-Latin content but allocates. Given nostr's multilingual
|
||||
content, prefer Unicode `to_lowercase` for the local matcher — correctness
|
||||
over micro-optimization, consistent with our "clarity over cleverness" rule —
|
||||
and note the trade-off.
|
||||
|
||||
Also document, per rust-nostr's SDK, that local matching is a *fallback*:
|
||||
relay results should generally be trusted as-is rather than re-filtered.
|
||||
|
||||
**`SearchQuery` model (new `search.rs`).** A struct splitting a query into
|
||||
free-text `terms: Vec<String>` and `extensions: Vec<(String, String)>` (ordered;
|
||||
NIP-50 doesn't forbid repeats, and order can matter to relays). Parsing: split on
|
||||
whitespace, treat a token containing `:` (with a non-empty key before it) as an
|
||||
extension, everything else as a term. Provide:
|
||||
- `SearchQuery::parse(&str) -> SearchQuery` (total, never fails — unknown shapes
|
||||
fall back to terms).
|
||||
- `Display` / `to_string()` that re-renders to the wire string (terms first or
|
||||
preserve order; planning to decide).
|
||||
- Builder helpers: `term`, `extension`, plus typed convenience for the
|
||||
spec-defined extensions (`domain`, `language`, `sentiment`, `nsfw`,
|
||||
`include_spam`) — optional, decide scope in planning.
|
||||
- A bridge to `Filter`: `Filter::add_search` can accept `impl Into<String>` so
|
||||
both a raw string and `query.to_string()` work; optionally
|
||||
`Filter::search_query()` to parse the field back out.
|
||||
|
||||
Keep `sentiment`/`nsfw` values as strings (or small enums) — leaning toward
|
||||
strings to stay forward-compatible with relay-specific values, with named
|
||||
constructors for the common cases.
|
||||
|
||||
**Dependencies.** None new. Parsing is plain string handling; matching uses std.
|
||||
Avoid pulling in a real FTS engine — out of scope and against the
|
||||
minimal-dependency rule.
|
||||
|
||||
**Out of scope (defer / mention only).** Real relevance ranking; relay-side
|
||||
indexing; NIP-11 search-relay discovery (a networking concern); the `order`
|
||||
hint from applesauce; multi-field/tag matching beyond `content` (could mention
|
||||
`title`/`subject` as a possible extension but keep the matcher content-only for
|
||||
clarity).
|
||||
@@ -0,0 +1,250 @@
|
||||
use coracle_lib::events::{Event, EventContent};
|
||||
use coracle_lib::filters::{intersect_filters, union_filters, Filter};
|
||||
use coracle_lib::keys::SecretKey;
|
||||
use coracle_lib::search::SearchQuery;
|
||||
|
||||
fn fixed_secret() -> SecretKey {
|
||||
let bytes: [u8; 32] = [
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31, 32,
|
||||
];
|
||||
SecretKey::from_hex(&hex::encode(bytes)).unwrap()
|
||||
}
|
||||
|
||||
fn make_event(content: &str, created_at: u64) -> Event {
|
||||
let sk = fixed_secret();
|
||||
let hashed = EventContent::new()
|
||||
.content(content)
|
||||
.kind(1)
|
||||
.stamp(created_at)
|
||||
.own(sk.public_key())
|
||||
.hash();
|
||||
hashed.clone().sign(sk.sign(&hashed.id))
|
||||
}
|
||||
|
||||
fn approx(a: f64, b: f64) -> bool {
|
||||
(a - b).abs() < 1e-9
|
||||
}
|
||||
|
||||
// --- SearchQuery parsing ---
|
||||
|
||||
#[test]
|
||||
fn parse_splits_on_whitespace() {
|
||||
let q = SearchQuery::parse("best nostr apps");
|
||||
assert_eq!(q.terms, vec!["best", "nostr", "apps"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_treats_extensions_as_terms() {
|
||||
// We don't interpret NIP-50 extensions; every token is just a term.
|
||||
let q = SearchQuery::parse("nostr domain:example.com language:en");
|
||||
assert_eq!(q.terms, vec!["nostr", "domain:example.com", "language:en"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_is_empty_for_blank_input() {
|
||||
assert!(SearchQuery::parse(" ").is_empty());
|
||||
assert!(SearchQuery::parse("").is_empty());
|
||||
assert!(SearchQuery::new().is_empty());
|
||||
assert!(!SearchQuery::parse("nostr").is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn display_joins_terms() {
|
||||
let q = SearchQuery::parse("nostr best apps");
|
||||
assert_eq!(q.to_string(), "nostr best apps");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn display_round_trips_through_parse() {
|
||||
let q = SearchQuery::parse("nostr best apps language:en");
|
||||
assert_eq!(SearchQuery::parse(&q.to_string()), q);
|
||||
}
|
||||
|
||||
// --- Scoring ---
|
||||
|
||||
#[test]
|
||||
fn score_full_match_is_one() {
|
||||
let q = SearchQuery::parse("nostr apps");
|
||||
assert!(approx(q.score("i love nostr and apps"), 1.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_no_match_is_zero() {
|
||||
let q = SearchQuery::parse("nostr");
|
||||
assert!(approx(q.score("no match here"), 0.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_partial_match_is_fraction() {
|
||||
let q = SearchQuery::parse("nostr apps");
|
||||
assert!(approx(q.score("only nostr here"), 0.5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_is_case_insensitive() {
|
||||
let q = SearchQuery::parse("NOSTR");
|
||||
assert!(approx(q.score("the Nostr protocol"), 1.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_extension_like_term_matches_as_text() {
|
||||
let q = SearchQuery::parse("language:en");
|
||||
assert!(approx(q.score("posted with language:en today"), 1.0));
|
||||
assert!(approx(q.score("no marker here"), 0.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_empty_query_is_one() {
|
||||
assert!(approx(SearchQuery::parse("").score("anything at all"), 1.0));
|
||||
assert!(approx(SearchQuery::new().score(""), 1.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_frequency_bonus_orders_partial_matches() {
|
||||
let q = SearchQuery::parse("alpha beta");
|
||||
let once = q.score("alpha only");
|
||||
let many = q.score("alpha alpha alpha");
|
||||
assert!(approx(once, 0.5));
|
||||
assert!(many > once, "repeated term should score higher: {many} vs {once}");
|
||||
assert!(many < 1.0, "a partial match must stay below a full match");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_never_exceeds_one() {
|
||||
let q = SearchQuery::parse("nostr");
|
||||
// Heavy repetition of a full match is still capped at 1.0.
|
||||
assert!(approx(q.score("nostr nostr nostr nostr nostr"), 1.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_is_bounded() {
|
||||
let q = SearchQuery::parse("alpha beta gamma");
|
||||
for content in ["", "alpha", "alpha beta", "alpha alpha gamma gamma", "alpha beta gamma"] {
|
||||
let s = q.score(content);
|
||||
assert!((0.0..=1.0).contains(&s), "score {s} out of range for {content:?}");
|
||||
}
|
||||
}
|
||||
|
||||
// --- Filter integration ---
|
||||
|
||||
#[test]
|
||||
fn add_and_clear_search() {
|
||||
let f = Filter::new().add_search("nostr");
|
||||
assert_eq!(f.search.as_deref(), Some("nostr"));
|
||||
assert_eq!(f.clear_search().search, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_score_without_search_is_one() {
|
||||
let event = make_event("anything", 1);
|
||||
assert!(approx(Filter::new().search_score(&event), 1.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_score_with_search() {
|
||||
let event = make_event("the nostr protocol", 1);
|
||||
assert!(approx(Filter::new().add_search("nostr").search_score(&event), 1.0));
|
||||
assert!(approx(Filter::new().add_search("missing").search_score(&event), 0.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matches_ignores_search() {
|
||||
// Structural matching and search scoring are independent.
|
||||
let event = make_event("hello", 1);
|
||||
let filter = Filter::new().add_kind(1).add_search("not-in-content");
|
||||
assert!(filter.matches(&event), "matches must not consider the search field");
|
||||
assert!(approx(filter.search_score(&event), 0.0));
|
||||
}
|
||||
|
||||
// --- Serialization ---
|
||||
|
||||
#[test]
|
||||
fn search_round_trips_through_json() {
|
||||
let filter = Filter::new().add_kind(1).add_search("best nostr apps");
|
||||
let json = serde_json::to_string(&filter).unwrap();
|
||||
assert!(json.contains("\"search\":\"best nostr apps\""));
|
||||
let parsed: Filter = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(filter, parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_absent_when_none() {
|
||||
let json = serde_json::to_string(&Filter::new().add_kind(1)).unwrap();
|
||||
assert!(!json.contains("search"));
|
||||
}
|
||||
|
||||
// --- Grouping and set algebra ---
|
||||
|
||||
#[test]
|
||||
fn group_distinguishes_searches() {
|
||||
let f1 = Filter::new().add_kind(1).add_search("alpha");
|
||||
let f2 = Filter::new().add_kind(1).add_search("beta");
|
||||
let f3 = Filter::new().add_kind(1).add_search("alpha");
|
||||
assert_ne!(f1.group(), f2.group());
|
||||
assert_eq!(f1.group(), f3.group());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn union_keeps_different_searches_separate() {
|
||||
let filters = vec![
|
||||
Filter::new().add_kind(1).add_search("alpha"),
|
||||
Filter::new().add_kind(1).add_search("beta"),
|
||||
];
|
||||
assert_eq!(union_filters(&filters).len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn union_merges_same_search() {
|
||||
let a = SecretKey::generate().public_key();
|
||||
let b = SecretKey::generate().public_key();
|
||||
let filters = vec![
|
||||
Filter::new().add_search("x").add_author(a),
|
||||
Filter::new().add_search("x").add_author(b),
|
||||
];
|
||||
let result = union_filters(&filters);
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].search.as_deref(), Some("x"));
|
||||
let authors = result[0].authors.as_ref().unwrap();
|
||||
assert!(authors.contains(&a) && authors.contains(&b));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn intersect_keeps_conflicting_searches_separate() {
|
||||
let groups = vec![
|
||||
vec![Filter::new().add_search("nostr")],
|
||||
vec![Filter::new().add_search("bitcoin")],
|
||||
];
|
||||
let result = intersect_filters(&groups);
|
||||
assert_eq!(result.len(), 2);
|
||||
let searches: std::collections::BTreeSet<_> =
|
||||
result.iter().map(|f| f.search.clone()).collect();
|
||||
assert!(searches.contains(&Some("nostr".to_string())));
|
||||
assert!(searches.contains(&Some("bitcoin".to_string())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn intersect_combines_when_one_side_has_search() {
|
||||
let author = SecretKey::generate().public_key();
|
||||
let groups = vec![
|
||||
vec![Filter::new().add_author(author)],
|
||||
vec![Filter::new().add_search("nostr")],
|
||||
];
|
||||
let result = intersect_filters(&groups);
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].search.as_deref(), Some("nostr"));
|
||||
assert!(result[0].authors.as_ref().unwrap().contains(&author));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn intersect_combines_equal_searches() {
|
||||
let groups = vec![
|
||||
vec![Filter::new().add_kind(1).add_search("x")],
|
||||
vec![Filter::new().add_kind(2).add_search("x")],
|
||||
];
|
||||
let result = intersect_filters(&groups);
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].search.as_deref(), Some("x"));
|
||||
let kinds = result[0].kinds.as_ref().unwrap();
|
||||
assert!(kinds.contains(&1) && kinds.contains(&2));
|
||||
}
|
||||
Reference in New Issue
Block a user