From 584835542869f14cf68055017035383470e6825f Mon Sep 17 00:00:00 2001 From: userAdityaa Date: Wed, 29 Apr 2026 23:45:30 +0530 Subject: [PATCH] fix: silent relay state drift when activity bus drops events --- backend/src/billing.rs | 35 +++++++++++++++++++++++++++++++++++ backend/src/command.rs | 10 +++++----- backend/src/infra.rs | 25 ++++++++++++++++++++----- backend/src/query.rs | 4 ++-- 4 files changed, 62 insertions(+), 12 deletions(-) diff --git a/backend/src/billing.rs b/backend/src/billing.rs index 583717e..336500d 100644 --- a/backend/src/billing.rs +++ b/backend/src/billing.rs @@ -120,6 +120,10 @@ impl Billing { pub async fn start(self) { let mut rx = self.command.notify.subscribe(); + if let Err(error) = self.reconcile_relay_subscriptions("startup").await { + tracing::error!(error = %error, "failed to reconcile relay billing state on startup"); + } + loop { match rx.recv().await { Ok(activity) => { @@ -129,12 +133,39 @@ impl Billing { } Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => { tracing::warn!(missed = n, "billing lagged"); + + if let Err(error) = self.reconcile_relay_subscriptions("lagged").await { + tracing::error!(error = %error, "failed to reconcile relay billing state after lag"); + } } Err(tokio::sync::broadcast::error::RecvError::Closed) => break, } } } + async fn reconcile_relay_subscriptions(&self, source: &str) -> Result<()> { + let relays = self.query.list_relays().await?; + + if relays.is_empty() { + return Ok(()); + } + + tracing::info!(source, relay_count = relays.len(), "reconciling relay billing state"); + + for relay in relays { + if let Err(error) = self.sync_relay_subscription_for_relay(&relay).await { + tracing::error!( + source, + relay = %relay.id, + error = %error, + "failed to reconcile relay billing state" + ); + } + } + + Ok(()) + } + async fn handle_activity(&self, activity: &Activity) -> Result<()> { let needs_billing_sync = matches!( activity.activity_type.as_str(), @@ -158,6 +189,10 @@ impl Billing { return Ok(()); }; + self.sync_relay_subscription_for_relay(&relay).await + } + + async fn sync_relay_subscription_for_relay(&self, relay: &Relay) -> Result<()> { let Some(tenant) = self.query.get_tenant(&relay.tenant).await? else { return Ok(()); }; diff --git a/backend/src/command.rs b/backend/src/command.rs index cd8a440..fb6c682 100644 --- a/backend/src/command.rs +++ b/backend/src/command.rs @@ -113,12 +113,12 @@ impl Command { sqlx::query( "INSERT INTO relay ( - id, tenant, schema, subdomain, plan, status, sync_error, + id, tenant, schema, subdomain, plan, status, synced, sync_error, info_name, info_icon, info_description, policy_public_join, policy_strip_signatures, groups_enabled, management_enabled, blossom_enabled, livekit_enabled, push_enabled - ) VALUES (?, ?, ?, ?, ?, 'active', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ) VALUES (?, ?, ?, ?, ?, 'active', 0, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", ) .bind(&relay.id) .bind(&relay.tenant) @@ -151,7 +151,7 @@ impl Command { sqlx::query( "UPDATE relay - SET tenant = ?, schema = ?, subdomain = ?, plan = ?, status = ?, sync_error = ?, + SET tenant = ?, schema = ?, subdomain = ?, plan = ?, status = ?, sync_error = ?, synced = 0, info_name = ?, info_icon = ?, info_description = ?, policy_public_join = ?, policy_strip_signatures = ?, groups_enabled = ?, management_enabled = ?, blossom_enabled = ?, @@ -203,7 +203,7 @@ impl Command { ) -> Result<()> { let mut tx = self.pool.begin().await?; - sqlx::query("UPDATE relay SET status = ? WHERE id = ?") + sqlx::query("UPDATE relay SET status = ?, synced = 0 WHERE id = ?") .bind(status) .bind(relay_id) .execute(&mut *tx) @@ -224,7 +224,7 @@ impl Command { pub async fn fail_relay_sync(&self, relay: &Relay, sync_error: String) -> Result<()> { let mut tx = self.pool.begin().await?; - sqlx::query("UPDATE relay SET sync_error = ? WHERE id = ?") + sqlx::query("UPDATE relay SET synced = 0, sync_error = ? WHERE id = ?") .bind(&sync_error) .bind(&relay.id) .execute(&mut *tx) diff --git a/backend/src/infra.rs b/backend/src/infra.rs index 6ac956d..6ef88c2 100644 --- a/backend/src/infra.rs +++ b/backend/src/infra.rs @@ -53,8 +53,8 @@ impl Infra { pub async fn start(self) { let mut rx = self.command.notify.subscribe(); - if let Err(e) = self.schedule_startup_retries().await { - tracing::error!(error = %e, "failed to schedule relay sync retries on startup"); + if let Err(error) = self.reconcile_relay_state("startup").await { + tracing::error!(error = %error, "failed to reconcile relay state on startup"); } loop { @@ -66,6 +66,10 @@ impl Infra { } Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => { tracing::warn!(missed = n, "infra lagged"); + + if let Err(error) = self.reconcile_relay_state("lagged").await { + tracing::error!(error = %error, "failed to reconcile relay state after lag"); + } } Err(tokio::sync::broadcast::error::RecvError::Closed) => break, } @@ -95,11 +99,22 @@ impl Infra { Ok(()) } - async fn schedule_startup_retries(&self) -> Result<()> { - let relays = self.query.list_relays_with_sync_error().await?; + async fn reconcile_relay_state(&self, source: &str) -> Result<()> { + let relays = self.query.list_relays_pending_sync().await?; + + if relays.is_empty() { + return Ok(()); + } + + tracing::info!(source, relay_count = relays.len(), "reconciling pending relay state"); for relay in relays { - self.schedule_relay_sync_retry(&relay.id, "startup").await?; + if relay.sync_error.trim().is_empty() { + let is_new = relay.synced == 0; + self.sync_and_report(&relay, is_new).await; + } else { + self.schedule_relay_sync_retry(&relay.id, source).await?; + } } Ok(()) diff --git a/backend/src/query.rs b/backend/src/query.rs index 85011fb..d231055 100644 --- a/backend/src/query.rs +++ b/backend/src/query.rs @@ -94,7 +94,7 @@ impl Query { Ok(rows) } - pub async fn list_relays_with_sync_error(&self) -> Result> { + pub async fn list_relays_pending_sync(&self) -> Result> { let rows = sqlx::query_as::<_, Relay>( "SELECT id, tenant, schema, subdomain, plan, stripe_subscription_item_id, status, sync_error, @@ -103,7 +103,7 @@ impl Query { groups_enabled, management_enabled, blossom_enabled, livekit_enabled, push_enabled, synced FROM relay - WHERE TRIM(sync_error) != '' + WHERE synced = 0 OR TRIM(sync_error) != '' ORDER BY id", ) .fetch_all(&self.pool) -- 2.52.0