fix(metadata): add case-insensitive HTML title fallback parsing for invite links #248

Merged
hodlbod merged 4 commits from Khushvendra/flotilla:issue/131-invite-link-preview into dev 2026-05-04 21:02:57 +00:00
2 changed files with 117 additions and 513 deletions
Showing only changes of commit e380e4e3d6 - Show all commits
+114 -513
View File
@@ -5,7 +5,7 @@ import {fileURLToPath} from "node:url"
import {serve} from "@hono/node-server"
import {serveStatic} from "@hono/node-server/serve-static"
import {fetchRelay} from "@welshman/app"
import {displayRelayUrl, isRelayUrl, normalizeRelayUrl} from "@welshman/util"
import {displayRelayUrl, normalizeRelayUrl} from "@welshman/util"
import {load} from "cheerio"
import {Hono} from "hono"
@@ -15,592 +15,193 @@ const __dirname = path.dirname(__filename)
const BUILD_DIR = path.join(__dirname, "build")
const INDEX_PATH = path.join(BUILD_DIR, "index.html")
const readPositiveInt = (value, fallback) => {
const parsed = Number.parseInt(value || "", 10)
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback
}
const PORT = readPositiveInt(process.env.PORT, 3000)
const PORT = parseInt(process.env.PORT || "", 10) || 3000
const HOST = process.env.HOST || "0.0.0.0"
const REQUEST_TIMEOUT_MS = readPositiveInt(process.env.INVITE_PREVIEW_TIMEOUT_MS, 1200)
const CACHE_MAX_ITEMS = readPositiveInt(process.env.INVITE_PREVIEW_CACHE_MAX, 500)
const POSITIVE_CACHE_TTL_MS = readPositiveInt(
process.env.INVITE_PREVIEW_CACHE_TTL_MS,
15 * 60 * 1000,
)
const NEGATIVE_CACHE_TTL_MS = readPositiveInt(
process.env.INVITE_PREVIEW_NEGATIVE_CACHE_TTL_MS,
2 * 60 * 1000,
)
const ALLOWED_METHODS = new Set(["GET", "HEAD"])
const DEFAULT_IMAGE_PATH = "/maskable-icon-512x512.png"
const HTML_HEADERS = {"Content-Type": "text/html; charset=utf-8"}
const truncate = (value, limit) => {
if (value.length <= limit) {
return value
}
if (limit <= 3) {
return value.slice(0, limit)
}
return `${value.slice(0, limit - 3)}...`
let TEMPLATE_HTML = ""
try {
TEMPLATE_HTML = await fs.readFile(INDEX_PATH, "utf8")
} catch (error) {
console.error(`Unable to read ${INDEX_PATH}. Run "pnpm run build" first.`)
process.exit(1)
}
const sanitizeText = (value, limit) => {
if (typeof value !== "string") {
const TEMPLATE_DOCUMENT = load(TEMPLATE_HTML)
const DEFAULT_PLATFORM_NAME =
process.env.VITE_PLATFORM_NAME ||
TEMPLATE_DOCUMENT('meta[property="og:title"]').attr("content") ||
"Flotilla"
const DEFAULT_PLATFORM_DESCRIPTION =
process.env.VITE_PLATFORM_DESCRIPTION ||
TEMPLATE_DOCUMENT('meta[name="description"]').attr("content") ||
"Flotilla is nostr - for communities."
Khushvendra marked this conversation as resolved Outdated
Outdated
Review

These will always be defined (because .env is checked in to version control), no need for a fallback

These will always be defined (because .env is checked in to version control), no need for a fallback
// Match client-side decode logic
const decodeRelay = url => {
try {
return normalizeRelayUrl(decodeURIComponent(url))
} catch {
return undefined
}
const compact = value.replace(/\s+/g, " ").trim()
if (!compact) {
return undefined
}
return truncate(compact, limit)
}
const isRecord = value => Boolean(value) && typeof value === "object" && !Array.isArray(value)
const hasMetadata = metadata => Object.keys(metadata).length > 0
const firstHeaderValue = value => {
if (typeof value !== "string") {
return undefined
}
return sanitizeText(value.split(",")[0], 256)
}
const requestUrlFromContext = context => {
const requestUrl = new URL(context.req.url)
const forwardedProto = firstHeaderValue(context.req.header("x-forwarded-proto"))
const forwardedHost = firstHeaderValue(context.req.header("x-forwarded-host"))
const forwardedProto = context.req.header("x-forwarded-proto")?.split(",")[0]?.trim()
const forwardedHost = context.req.header("x-forwarded-host")?.split(",")[0]?.trim()
try {
if (forwardedProto === "http" || forwardedProto === "https") {
requestUrl.protocol = `${forwardedProto}:`
}
if (forwardedProto === "http" || forwardedProto === "https") {
requestUrl.protocol = `${forwardedProto}:`
}
if (forwardedHost) {
requestUrl.host = forwardedHost
}
} catch {
return requestUrl
if (forwardedHost) {
requestUrl.host = forwardedHost
}
return requestUrl
}
const absoluteUrlFromRequest = (requestUrl, value) => {
try {
return new URL(value, requestUrl.origin).toString()
} catch {
return value
}
}
const resolveMetadata = async requestUrl => {
const pathname = requestUrl.pathname
let relayParam = undefined
const normalizeImageUrl = (value, baseUrl) => {
if (typeof value !== "string") {
return undefined
// Match /join?r=...
if (pathname === "/join" || pathname === "/join/") {
relayParam = requestUrl.searchParams.get("r")
}
// Match /spaces/:relay/...
else if (pathname.startsWith("/spaces/")) {
const parts = pathname.split("/").filter(Boolean)
if (parts.length >= 2) {
relayParam = decodeRelay(parts[1])
}
}
Khushvendra marked this conversation as resolved Outdated
Outdated
Review

This routing logic is still incomplete and is pretty brittle. We should do something like this:

const routes = , [
  [/^\/join\/?$/, getMetadataForInvite],
  [/^\/spaces\/(RELAY_REGEX)\/?$/, getMetadataForSpace],
  [/^\/spaces\/(RELAY_REGEX)\/(ROOM_REGEX)\/?$/, getMetadataForRoom],
  [/^\/spaces\/(RELAY_REGEX)\/(ROOM_REGEX)\/calendar\/?$/, getMetadataForCalendar],
  [/^\/spaces\/(RELAY_REGEX)\/(ROOM_REGEX)\/calendar\/(ADDRESS_REGEX)\/?$/, getMetadataForCalendarEvent],
]
const getMetadataForRoute = (url: URL) => {
  for (const [regex, getMetadata] of routes) {
    const match = url.pathname.match(regex)
    
    if (match) {
      return getMetadata(url, match)
    }
  }
}
const meta = getMetadataForRoute(requestUrl)

This way it's clear which function is responsible for which route. Common utilities can be factored out (e.g. relay fetching, relay title generation, etc).

This routing logic is still incomplete and is pretty brittle. We should do something like this: ```typescript const routes = , [ [/^\/join\/?$/, getMetadataForInvite], [/^\/spaces\/(RELAY_REGEX)\/?$/, getMetadataForSpace], [/^\/spaces\/(RELAY_REGEX)\/(ROOM_REGEX)\/?$/, getMetadataForRoom], [/^\/spaces\/(RELAY_REGEX)\/(ROOM_REGEX)\/calendar\/?$/, getMetadataForCalendar], [/^\/spaces\/(RELAY_REGEX)\/(ROOM_REGEX)\/calendar\/(ADDRESS_REGEX)\/?$/, getMetadataForCalendarEvent], ] const getMetadataForRoute = (url: URL) => { for (const [regex, getMetadata] of routes) { const match = url.pathname.match(regex) if (match) { return getMetadata(url, match) } } } const meta = getMetadataForRoute(requestUrl) ``` This way it's clear which function is responsible for which route. Common utilities can be factored out (e.g. relay fetching, relay title generation, etc).
const trimmed = value.trim()
if (!trimmed) {
if (!relayParam) {
return undefined
}
try {
const imageUrl = new URL(trimmed, baseUrl)
// Note: fetchRelay from @welshman/app handles the ws->http conversion and caching
const relayMetadata = await fetchRelay(normalizeRelayUrl(relayParam))
if (!["http:", "https:"].includes(imageUrl.protocol)) {
if (!relayMetadata) {
return undefined
}
return imageUrl.toString()
} catch {
return undefined
}
}
const relayDisplay = displayRelayUrl(relayParam)
const spaceName = relayMetadata.name
const relayDescription = relayMetadata.description
const normalizeRelayInput = value => {
const trimmed = value.trim()
const title = spaceName
? `Invite to ${spaceName} on ${DEFAULT_PLATFORM_NAME}`
: `Invite to a Space on ${DEFAULT_PLATFORM_NAME}`
if (!trimmed) {
return undefined
}
let relayInput = trimmed
try {
const parsed = new URL(trimmed)
if (parsed.protocol === "http:") {
parsed.protocol = "ws:"
relayInput = parsed.toString()
}
if (parsed.protocol === "https:") {
parsed.protocol = "wss:"
relayInput = parsed.toString()
}
} catch {
relayInput = trimmed
}
if (!isRelayUrl(relayInput)) {
return undefined
}
try {
const relayUrl = new URL(normalizeRelayUrl(relayInput))
relayUrl.hash = ""
relayUrl.search = ""
return relayUrl.toString()
} catch {
return undefined
}
}
const relayToInfoUrl = relayUrl => {
try {
const relayHttpUrl = new URL(relayUrl)
if (relayHttpUrl.protocol === "ws:") {
relayHttpUrl.protocol = "http:"
} else if (relayHttpUrl.protocol === "wss:") {
relayHttpUrl.protocol = "https:"
const parts = []
if (spaceName) {
parts.push(`You are invited to join ${spaceName} on ${DEFAULT_PLATFORM_NAME}.`)
} else {
return undefined
parts.push(`You are invited to join a space on ${DEFAULT_PLATFORM_NAME}.`)
}
relayHttpUrl.hash = ""
relayHttpUrl.search = ""
if (relayDisplay) parts.push(`Relay: ${relayDisplay}.`)
if (relayDescription) parts.push(relayDescription)
else parts.push(DEFAULT_PLATFORM_DESCRIPTION)
return relayHttpUrl.toString()
} catch {
const description = parts.join(" ")
const image =
relayMetadata.icon ||
relayMetadata.picture ||
relayMetadata.image ||
new URL("/maskable-icon-512x512.png", requestUrl.origin).toString()
return {
title,
description,
image,
url: requestUrl.toString(),
site: requestUrl.origin,
}
} catch (err) {
return undefined
}
Khushvendra marked this conversation as resolved Outdated
Outdated
Review

We should not be swallowing errors, add a console.error statement here

We should not be swallowing errors, add a console.error statement here
}
const withTimeout = async (promise, timeoutMs) =>
new Promise(resolve => {
if (timeoutMs <= 0) {
resolve(undefined)
return
}
const injectMeta = metadata => {
const $ = load(TEMPLATE_HTML)
const timeout = setTimeout(() => resolve(undefined), timeoutMs)
promise
.then(resolve)
.catch(() => resolve(undefined))
.finally(() => clearTimeout(timeout))
})
const findMeta = ($, key) => {
const normalizedKey = key.toLowerCase()
return $("meta")
.filter((_, element) => {
const meta = $(element)
const name = meta.attr("name")?.toLowerCase()
const property = meta.attr("property")?.toLowerCase()
return name === normalizedKey || property === normalizedKey
})
.first()
}
const readMetaContent = ($, key) => findMeta($, key).attr("content")
const readIconHref = $ => {
let href
$("link[rel]").each((_, element) => {
if (href) {
return
}
const link = $(element)
const rel = link.attr("rel")?.toLowerCase() || ""
if (rel.includes("icon")) {
href = link.attr("href")
}
})
return href
}
const findLinkRel = ($, rel) =>
$("link[rel]")
.filter((_, element) => ($(element).attr("rel")?.toLowerCase() || "").includes(rel))
.first()
const extractHtmlMetadata = (html, baseUrl) => {
const $ = load(html)
const name = sanitizeText($("title").first().text(), 80)
const description = sanitizeText(readMetaContent($, "description") || "", 180)
const icon = normalizeImageUrl(
readMetaContent($, "og:image") || readMetaContent($, "twitter:image") || readIconHref($) || "",
baseUrl,
)
return {
...(name ? {name} : {}),
...(description ? {description} : {}),
...(icon ? {icon} : {}),
}
}
const normalizeRelayMetadata = (relayMetadata, relayUrl) => {
if (!isRecord(relayMetadata)) {
return {}
if (metadata.title) {
$("title").text(metadata.title)
$('meta[property="og:title"]').attr("content", metadata.title)
$('meta[name="twitter:title"]').attr("content", metadata.title)
}
const infoUrl = relayToInfoUrl(relayUrl) || relayUrl
const name = sanitizeText(relayMetadata.name || relayMetadata.title, 80)
const description = sanitizeText(relayMetadata.description, 180)
const icon = normalizeImageUrl(
relayMetadata.icon || relayMetadata.picture || relayMetadata.image,
infoUrl,
)
return {
...(name ? {name} : {}),
...(description ? {description} : {}),
...(icon ? {icon} : {}),
}
}
const fetchHtmlRelayMetadata = async (relayUrl, timeoutMs) => {
const infoUrl = relayToInfoUrl(relayUrl)
if (!infoUrl || timeoutMs <= 0) {
return {}
if (metadata.description) {
$('meta[name="description"]').attr("content", metadata.description)
$('meta[property="og:description"]').attr("content", metadata.description)
$('meta[name="twitter:description"]').attr("content", metadata.description)
}
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort(), timeoutMs)
try {
const response = await fetch(infoUrl, {
headers: {
Accept: "text/html, application/xhtml+xml, */*;q=0.1",
},
redirect: "follow",
signal: controller.signal,
})
if (!response.ok) {
return {}
}
return extractHtmlMetadata(await response.text(), infoUrl)
} catch {
return {}
} finally {
clearTimeout(timeout)
}
}
const cacheByRelay = new Map()
const inFlightByRelay = new Map()
const getCachedRelayData = relayUrl => {
const cached = cacheByRelay.get(relayUrl)
if (cached === undefined) {
return undefined
if (metadata.image) {
$('meta[property="og:image"]').attr("content", metadata.image)
$('meta[name="twitter:image"]').attr("content", metadata.image)
}
if (cached.expiresAt <= Date.now()) {
cacheByRelay.delete(relayUrl)
return undefined
if (metadata.url) {
$('meta[property="og:url"]').attr("content", metadata.url)
$('meta[name="twitter:site"]').attr("content", metadata.site)
$('meta[name="twitter:url"]').attr("content", metadata.url)
$('link[rel="canonical"]').attr("href", metadata.url)
}
cacheByRelay.delete(relayUrl)
cacheByRelay.set(relayUrl, cached)
return cached.value
}
const setCachedRelayData = (relayUrl, value, ttlMs) => {
if (cacheByRelay.size >= CACHE_MAX_ITEMS) {
const oldestKey = cacheByRelay.keys().next().value
if (oldestKey !== undefined) {
cacheByRelay.delete(oldestKey)
}
}
cacheByRelay.set(relayUrl, {
expiresAt: Date.now() + ttlMs,
value,
})
}
const fetchRelayMetadata = async relayUrl => {
const cached = getCachedRelayData(relayUrl)
if (cached !== undefined) {
return cached
}
const inFlight = inFlightByRelay.get(relayUrl)
if (inFlight !== undefined) {
return inFlight
}
const loader = (async () => {
const deadline = Date.now() + REQUEST_TIMEOUT_MS
const remainingTimeout = () => Math.max(0, deadline - Date.now())
const relay = await withTimeout(fetchRelay(relayUrl), remainingTimeout())
let metadata = normalizeRelayMetadata(relay, relayUrl)
if (!hasMetadata(metadata)) {
metadata = await fetchHtmlRelayMetadata(relayUrl, remainingTimeout())
}
setCachedRelayData(
relayUrl,
metadata,
hasMetadata(metadata) ? POSITIVE_CACHE_TTL_MS : NEGATIVE_CACHE_TTL_MS,
)
return metadata
})()
inFlightByRelay.set(relayUrl, loader)
try {
return await loader
} finally {
inFlightByRelay.delete(relayUrl)
}
}
const isJoinInvitePath = pathname => pathname === "/join" || pathname === "/join/"
const parseInvite = requestUrl => {
const relayParam = requestUrl.searchParams.get("r") || ""
const relayUrl = normalizeRelayInput(relayParam)
if (!relayUrl) {
return undefined
}
const claim = sanitizeText(requestUrl.searchParams.get("c") || "", 256) || ""
return {relayUrl, claim}
}
const loadIndexTemplate = async () => {
try {
return await fs.readFile(INDEX_PATH, "utf8")
} catch (error) {
console.error(`Unable to read ${INDEX_PATH}. Run \"pnpm run build\" first.`)
throw error
}
}
const INDEX_TEMPLATE = await loadIndexTemplate()
const INDEX_DOCUMENT = load(INDEX_TEMPLATE)
const DEFAULT_PLATFORM_NAME =
sanitizeText(process.env.VITE_PLATFORM_NAME, 80) ||
sanitizeText(readMetaContent(INDEX_DOCUMENT, "og:title"), 80) ||
sanitizeText(readMetaContent(INDEX_DOCUMENT, "twitter:title"), 80) ||
"Flotilla"
const DEFAULT_PLATFORM_DESCRIPTION =
sanitizeText(process.env.VITE_PLATFORM_DESCRIPTION, 180) ||
sanitizeText(readMetaContent(INDEX_DOCUMENT, "description"), 180) ||
"Flotilla is nostr - for communities."
const buildInviteDescription = ({spaceName, relayDisplay, relayDescription}) => {
const parts = []
if (spaceName) {
parts.push(`You are invited to join ${spaceName} on ${DEFAULT_PLATFORM_NAME}.`)
} else {
parts.push(`You are invited to join a space on ${DEFAULT_PLATFORM_NAME}.`)
}
if (relayDisplay) {
parts.push(`Relay: ${relayDisplay}.`)
}
if (relayDescription) {
parts.push(relayDescription)
} else {
parts.push(DEFAULT_PLATFORM_DESCRIPTION)
}
return sanitizeText(parts.join(" "), 220) || DEFAULT_PLATFORM_DESCRIPTION
}
const buildInviteMeta = (requestUrl, invite, relayMetadata) => {
const relayDisplay = displayRelayUrl(invite.relayUrl)
const spaceName = sanitizeText(relayMetadata.name, 80)
const relayDescription = sanitizeText(relayMetadata.description, 180)
const title = spaceName
? `Invite to ${spaceName} on ${DEFAULT_PLATFORM_NAME}`
: `Invite to a Space on ${DEFAULT_PLATFORM_NAME}`
const description = buildInviteDescription({spaceName, relayDisplay, relayDescription})
const image = relayMetadata.icon || absoluteUrlFromRequest(requestUrl, DEFAULT_IMAGE_PATH)
const url = requestUrl.toString()
const site = requestUrl.origin
return {title, description, image, url, site}
}
const ensureHead = $ => {
const existingHead = $("head").first()
if (existingHead.length > 0) {
return existingHead
}
$("html").prepend("<head></head>")
return $("head").first()
}
const upsertTitle = ($, title) => {
const head = ensureHead($)
let titleTag = head.children("title").first()
if (titleTag.length === 0) {
titleTag = $("<title></title>")
head.prepend(titleTag)
}
titleTag.text(title)
}
const upsertMetaTag = ($, key, content, attribute) => {
const head = ensureHead($)
let tag = findMeta($, key)
if (tag.length === 0) {
tag = $("<meta>")
head.append(tag)
}
tag.removeAttr(attribute === "name" ? "property" : "name")
tag.attr(attribute, key)
tag.attr("content", content)
}
const upsertCanonical = ($, href) => {
const head = ensureHead($)
let tag = findLinkRel($, "canonical")
if (tag.length === 0) {
tag = $("<link>")
head.append(tag)
}
tag.attr("rel", "canonical")
tag.attr("href", href)
}
const injectInviteMeta = (html, metadata) => {
const $ = load(html)
upsertTitle($, metadata.title)
upsertCanonical($, metadata.url)
upsertMetaTag($, "description", metadata.description, "name")
upsertMetaTag($, "og:type", "website", "property")
upsertMetaTag($, "og:url", metadata.url, "property")
upsertMetaTag($, "og:title", metadata.title, "property")
upsertMetaTag($, "og:description", metadata.description, "property")
upsertMetaTag($, "og:image", metadata.image, "property")
upsertMetaTag($, "twitter:card", "summary_large_image", "name")
upsertMetaTag($, "twitter:site", metadata.site, "name")
upsertMetaTag($, "twitter:url", metadata.url, "name")
upsertMetaTag($, "twitter:title", metadata.title, "name")
upsertMetaTag($, "twitter:description", metadata.description, "name")
upsertMetaTag($, "twitter:image", metadata.image, "name")
return $.html()
}
const renderIndex = async requestUrl => {
if (!isJoinInvitePath(requestUrl.pathname)) {
return INDEX_TEMPLATE
}
const invite = parseInvite(requestUrl)
if (!invite) {
return INDEX_TEMPLATE
}
const relayMetadata = await fetchRelayMetadata(invite.relayUrl)
const inviteMeta = buildInviteMeta(requestUrl, invite, relayMetadata)
return injectInviteMeta(INDEX_TEMPLATE, inviteMeta)
}
const isImmutableAsset = filePath => filePath.split(path.sep).join("/").includes("/_app/immutable/")
const getStaticCacheControl = filePath =>
path.basename(filePath) === "index.html"
? "no-cache"
: isImmutableAsset(filePath)
? "public, max-age=31536000, immutable"
: "public, max-age=3600"
const respondHtml = (html, isHeadRequest, cacheControl) =>
new Response(isHeadRequest ? undefined : html, {
headers: {
...HTML_HEADERS,
"Cache-Control": cacheControl,
},
status: 200,
})
const app = new Hono()
// Only allow GET and HEAD requests
app.use("*", async (context, next) => {
if (!ALLOWED_METHODS.has(context.req.method)) {
const method = context.req.method
if (method !== "GET" && method !== "HEAD") {
return context.text("Method Not Allowed", 405, {Allow: "GET, HEAD"})
}
await next()
})
// Serve static assets with appropriate caching
app.use(
"*",
serveStatic({
root: BUILD_DIR,
onFound: (filePath, context) => {
context.header("Cache-Control", getStaticCacheControl(filePath))
const isImmutable = filePath.split(path.sep).join("/").includes("/_app/immutable/")
const cacheControl =
path.basename(filePath) === "index.html"
? "no-cache"
: isImmutable
? "public, max-age=31536000, immutable"
: "public, max-age=3600"
context.header("Cache-Control", cacheControl)
},
}),
)
app.on(["GET", "HEAD"], "*", async context => {
// SPA fallback for routes that don't match static files
app.get("*", async context => {
const requestUrl = requestUrlFromContext(context)
// If the path has an extension, it's likely a missing static asset, not an SPA route
if (path.extname(requestUrl.pathname)) {
return context.text("Not found", 404)
}
const dynamicInvite = isJoinInvitePath(requestUrl.pathname) && requestUrl.searchParams.has("r")
const html = await renderIndex(requestUrl)
const metadata = await resolveMetadata(requestUrl)
const html = metadata ? injectMeta(metadata) : TEMPLATE_HTML
return respondHtml(html, context.req.method === "HEAD", dynamicInvite ? "no-store" : "no-cache")
return context.html(html, 200, {
"Cache-Control": metadata ? "no-store" : "no-cache",
})
})
serve(
+3
View File
@@ -2,6 +2,8 @@
<html lang="en">
<head>
<meta charset="utf-8" />
<title>{NAME}</title>
<link rel="canonical" href="{URL}" />
<meta
name="viewport"
content="width=device-width, initial-scale=1.0, viewport-fit=cover, interactive-widget=resizes-content" />
@@ -11,6 +13,7 @@
<meta property="og:type" content="website" />
<meta property="og:title" content="{NAME}" />
<meta property="og:description" content="{DESCRIPTION}" />
<meta property="og:image" content="" />
<meta name="twitter:card" content="summary" />
<meta name="twitter:site" content="{URL}" />
<meta name="twitter:title" content="{NAME}" />