fix: silent relay state drift when activity bus drops events #53
@@ -120,6 +120,10 @@ impl Billing {
|
||||
pub async fn start(self) {
|
||||
let mut rx = self.command.notify.subscribe();
|
||||
|
||||
if let Err(error) = self.reconcile_relay_subscriptions("startup").await {
|
||||
tracing::error!(error = %error, "failed to reconcile relay billing state on startup");
|
||||
}
|
||||
|
||||
loop {
|
||||
match rx.recv().await {
|
||||
Ok(activity) => {
|
||||
@@ -129,12 +133,39 @@ impl Billing {
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
|
||||
tracing::warn!(missed = n, "billing lagged");
|
||||
|
||||
if let Err(error) = self.reconcile_relay_subscriptions("lagged").await {
|
||||
tracing::error!(error = %error, "failed to reconcile relay billing state after lag");
|
||||
}
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn reconcile_relay_subscriptions(&self, source: &str) -> Result<()> {
|
||||
let relays = self.query.list_relays().await?;
|
||||
|
||||
if relays.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
tracing::info!(source, relay_count = relays.len(), "reconciling relay billing state");
|
||||
|
||||
for relay in relays {
|
||||
if let Err(error) = self.sync_relay_subscription_for_relay(&relay).await {
|
||||
tracing::error!(
|
||||
source,
|
||||
relay = %relay.id,
|
||||
error = %error,
|
||||
"failed to reconcile relay billing state"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_activity(&self, activity: &Activity) -> Result<()> {
|
||||
let needs_billing_sync = matches!(
|
||||
activity.activity_type.as_str(),
|
||||
@@ -158,6 +189,10 @@ impl Billing {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
self.sync_relay_subscription_for_relay(&relay).await
|
||||
}
|
||||
|
||||
async fn sync_relay_subscription_for_relay(&self, relay: &Relay) -> Result<()> {
|
||||
let Some(tenant) = self.query.get_tenant(&relay.tenant).await? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
@@ -113,12 +113,12 @@ impl Command {
|
||||
|
||||
sqlx::query(
|
||||
"INSERT INTO relay (
|
||||
id, tenant, schema, subdomain, plan, status, sync_error,
|
||||
id, tenant, schema, subdomain, plan, status, synced, sync_error,
|
||||
info_name, info_icon, info_description,
|
||||
policy_public_join, policy_strip_signatures,
|
||||
groups_enabled, management_enabled, blossom_enabled,
|
||||
livekit_enabled, push_enabled
|
||||
) VALUES (?, ?, ?, ?, ?, 'active', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
) VALUES (?, ?, ?, ?, ?, 'active', 0, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
)
|
||||
.bind(&relay.id)
|
||||
.bind(&relay.tenant)
|
||||
@@ -151,7 +151,7 @@ impl Command {
|
||||
|
||||
sqlx::query(
|
||||
"UPDATE relay
|
||||
SET tenant = ?, schema = ?, subdomain = ?, plan = ?, status = ?, sync_error = ?,
|
||||
SET tenant = ?, schema = ?, subdomain = ?, plan = ?, status = ?, sync_error = ?, synced = 0,
|
||||
info_name = ?, info_icon = ?, info_description = ?,
|
||||
policy_public_join = ?, policy_strip_signatures = ?,
|
||||
groups_enabled = ?, management_enabled = ?, blossom_enabled = ?,
|
||||
@@ -203,7 +203,7 @@ impl Command {
|
||||
) -> Result<()> {
|
||||
let mut tx = self.pool.begin().await?;
|
||||
|
||||
sqlx::query("UPDATE relay SET status = ? WHERE id = ?")
|
||||
sqlx::query("UPDATE relay SET status = ?, synced = 0 WHERE id = ?")
|
||||
.bind(status)
|
||||
.bind(relay_id)
|
||||
.execute(&mut *tx)
|
||||
@@ -224,7 +224,7 @@ impl Command {
|
||||
pub async fn fail_relay_sync(&self, relay: &Relay, sync_error: String) -> Result<()> {
|
||||
let mut tx = self.pool.begin().await?;
|
||||
|
||||
sqlx::query("UPDATE relay SET sync_error = ? WHERE id = ?")
|
||||
sqlx::query("UPDATE relay SET synced = 0, sync_error = ? WHERE id = ?")
|
||||
.bind(&sync_error)
|
||||
.bind(&relay.id)
|
||||
.execute(&mut *tx)
|
||||
|
||||
+20
-5
@@ -53,8 +53,8 @@ impl Infra {
|
||||
pub async fn start(self) {
|
||||
let mut rx = self.command.notify.subscribe();
|
||||
|
||||
if let Err(e) = self.schedule_startup_retries().await {
|
||||
tracing::error!(error = %e, "failed to schedule relay sync retries on startup");
|
||||
if let Err(error) = self.reconcile_relay_state("startup").await {
|
||||
tracing::error!(error = %error, "failed to reconcile relay state on startup");
|
||||
}
|
||||
|
||||
loop {
|
||||
@@ -66,6 +66,10 @@ impl Infra {
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
|
||||
tracing::warn!(missed = n, "infra lagged");
|
||||
|
||||
if let Err(error) = self.reconcile_relay_state("lagged").await {
|
||||
tracing::error!(error = %error, "failed to reconcile relay state after lag");
|
||||
}
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
|
||||
}
|
||||
@@ -95,11 +99,22 @@ impl Infra {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn schedule_startup_retries(&self) -> Result<()> {
|
||||
let relays = self.query.list_relays_with_sync_error().await?;
|
||||
async fn reconcile_relay_state(&self, source: &str) -> Result<()> {
|
||||
let relays = self.query.list_relays_pending_sync().await?;
|
||||
|
||||
if relays.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
tracing::info!(source, relay_count = relays.len(), "reconciling pending relay state");
|
||||
|
||||
for relay in relays {
|
||||
self.schedule_relay_sync_retry(&relay.id, "startup").await?;
|
||||
if relay.sync_error.trim().is_empty() {
|
||||
let is_new = relay.synced == 0;
|
||||
self.sync_and_report(&relay, is_new).await;
|
||||
} else {
|
||||
self.schedule_relay_sync_retry(&relay.id, source).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -94,7 +94,7 @@ impl Query {
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
pub async fn list_relays_with_sync_error(&self) -> Result<Vec<Relay>> {
|
||||
pub async fn list_relays_pending_sync(&self) -> Result<Vec<Relay>> {
|
||||
let rows = sqlx::query_as::<_, Relay>(
|
||||
"SELECT id, tenant, schema, subdomain, plan, stripe_subscription_item_id,
|
||||
status, sync_error,
|
||||
@@ -103,7 +103,7 @@ impl Query {
|
||||
groups_enabled, management_enabled, blossom_enabled,
|
||||
livekit_enabled, push_enabled, synced
|
||||
FROM relay
|
||||
WHERE TRIM(sync_error) != ''
|
||||
WHERE synced = 0 OR TRIM(sync_error) != ''
|
||||
ORDER BY id",
|
||||
)
|
||||
.fetch_all(&self.pool)
|
||||
|
||||
Reference in New Issue
Block a user