diff --git a/.eslintrc.json b/.eslintrc.json index 0fa1a43..fd73c8d 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -21,6 +21,8 @@ "@typescript-eslint/no-unused-vars": ["error", {"args": "none"}], "@typescript-eslint/no-explicit-any": "off", "@typescript-eslint/ban-ts-comment": "off", + "no-useless-escape": "off", + "prefer-const": ["error", {"destructuring": "all"}], "object-curly-spacing": ["error", "never"], "array-bracket-spacing": ["error", "never"], "semi": ["error", "never"] diff --git a/package-lock.json b/package-lock.json index 14a30f1..ec6b6e2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -532,6 +532,10 @@ "dev": true, "license": "ISC" }, + "node_modules/@welshman/content": { + "resolved": "packages/content", + "link": true + }, "node_modules/@welshman/feeds": { "resolved": "packages/feeds", "link": true @@ -2004,8 +2008,9 @@ } }, "node_modules/nostr-tools": { - "version": "2.3.2", - "license": "Unlicense", + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/nostr-tools/-/nostr-tools-2.7.0.tgz", + "integrity": "sha512-jJoL2J1CBiKDxaXZww27nY/Wsuxzx7AULxmGKFce4sskDu1tohNyfnzYQ8BvDyvkstU8kNZUAXPL32tre33uig==", "dependencies": { "@noble/ciphers": "^0.5.1", "@noble/curves": "1.2.0", @@ -3057,6 +3062,19 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "packages/content": { + "name": "@welshman/content", + "version": "0.0.1", + "license": "MIT", + "dependencies": { + "nostr-tools": "^2.7.0" + }, + "devDependencies": { + "gts": "^5.0.1", + "tsc-multi": "^1.1.0", + "typescript": "~5.1.6" + } + }, "packages/feeds": { "name": "@welshman/feeds", "version": "0.0.9", diff --git a/packages/content/.eslintignore b/packages/content/.eslintignore new file mode 100644 index 0000000..43e824a --- /dev/null +++ b/packages/content/.eslintignore @@ -0,0 +1,2 @@ +build +normalize-url diff --git a/packages/content/README.md b/packages/content/README.md new file mode 100644 index 0000000..24dc111 --- /dev/null +++ b/packages/content/README.md @@ -0,0 +1,3 @@ +# @welshman/content [![version](https://badgen.net/npm/v/@welshman/content)](https://npmjs.com/package/@welshman/content) + +Utilities for parsing note content. diff --git a/packages/content/index.ts b/packages/content/index.ts new file mode 100644 index 0000000..610daf4 --- /dev/null +++ b/packages/content/index.ts @@ -0,0 +1,425 @@ +import {nip19} from "nostr-tools" + +const last = (xs: T[], ...args: unknown[]) => xs[xs.length - 1] + +const fromNostrURI = (s: string) => s.replace(/^nostr:\/?\/?/, "") + +// Copy some types from nostr-tools because I can't import them + +type AddressPointer = { + identifier: string + pubkey: string + kind: number + relays?: string[] +} + +type EventPointer = { + id: string + relays?: string[] + author?: string + kind?: number +} + +type ProfilePointer = { + pubkey: string + relays?: string[] +} + +// Types + +export type ParseContext = { + results: Parsed[] + content: string, + tags: string[][] +} + +export enum ParsedType { + Address = "address", + Cashu = "cashu", + CodeBlock = "code_block", + CodeInline = "code_inline", + Ellipsis = "ellipsis", + Event = "event", + Invoice = "invoice", + Link = "link", + Newline = "newline", + Profile = "profile", + Text = "text", + Topic = "topic", +} + +export type ParsedCashu = { + type: ParsedType.Cashu + value: string + raw: string +} + +export type ParsedCodeBlock = { + type: ParsedType.CodeBlock + value: string + raw: string +} + +export type ParsedCodeInline = { + type: ParsedType.CodeInline + value: string + raw: string +} + +export type ParsedEllipsis = { + type: ParsedType.Ellipsis + value: string + raw: string +} + +export type ParsedInvoice = { + type: ParsedType.Invoice + value: string + raw: string +} + +export type ParsedLinkValue = { + url: string + hash: string + meta: Record + isMedia: boolean +} + +export type ParsedLink = { + type: ParsedType.Link + value: ParsedLinkValue + raw: string +} + +export type ParsedNewline = { + type: ParsedType.Newline + value: string + raw: string +} + +export type ParsedText = { + type: ParsedType.Text + value: string + raw: string +} + +export type ParsedTopic = { + type: ParsedType.Topic + value: string + raw: string +} + +export type ParsedEvent = { + type: ParsedType.Event + value: EventPointer + raw: string +} + +export type ParsedProfile = { + type: ParsedType.Profile + value: ProfilePointer + raw: string +} + +export type ParsedAddress = { + type: ParsedType.Address + value: AddressPointer + raw: string +} + +export type Parsed = + ParsedAddress | + ParsedCashu | + ParsedCodeBlock | + ParsedCodeInline | + ParsedEllipsis | + ParsedEvent | + ParsedInvoice | + ParsedLink | + ParsedNewline | + ParsedProfile | + ParsedText | + ParsedTopic + +// Parsers for known formats + +export const parseAddress = (raw: string, context: ParseContext): ParsedAddress | void => { + const [naddr] = raw.match(/^(web\+)?(nostr:)?\/?\/?naddr1[\d\w]+/i) || [] + + if (naddr) { + try { + const {data} = nip19.decode(fromNostrURI(naddr)) + + return {type: ParsedType.Address, value: data as AddressPointer, raw} + } catch (e) { + // Pass + } + } +} + +export const parseCashu = (raw: string, context: ParseContext): ParsedCashu | void => { + const [value] = raw.match(/^(cashu)[\d\w=]{50,5000}/i) || [] + + if (value) { + return {type: ParsedType.Cashu, value, raw} + } +} + +export const parseCodeBlock = (raw: string, context: ParseContext): ParsedCodeBlock | void => { + const [code, value] = raw.match(/^```([^]*?)```/i) || [] + + if (code) { + return {type: ParsedType.CodeBlock, value, raw} + } +} + +export const parseCodeInline = (raw: string, context: ParseContext): ParsedCodeInline | void => { + const [code, value] = raw.match(/^`(.*?)`/i) || [] + + if (code) { + return {type: ParsedType.CodeInline, value, raw} + } +} + +export const parseEvent = (raw: string, context: ParseContext): ParsedEvent | void => { + const [entity] = raw.match(/^(web\+)?(nostr:)?\/?\/?n(event|ote)1[\d\w]+/i) || [] + + if (entity) { + try { + const {type, data} = nip19.decode(fromNostrURI(entity)) + const value = type === "note" + ? {id: data as string, relays: []} + : data as EventPointer + + return {type: ParsedType.Event, value, raw} + } catch (e) { + // Pass + } + } +} + +export const parseInvoice = (raw: string, context: ParseContext): ParsedInvoice | void => { + const [value] = raw.match(/^ln(lnbc|lnurl)[\d\w]{50,1000}/i) || [] + + if (value) { + return {type: ParsedType.Invoice, value, raw} + } +} + +export const parseLink = (raw: string, context: ParseContext): ParsedLink | void => { + const [link] = raw.match(/^([a-z\+:]{2,30}:\/\/)?[^<>\(\)\s]+\.[a-z]{2,6}[^\s]*[^<>"'\.!?,:\s\)\(]/gi) || [] + + if (!link) { + return + } + + const prev = last(context.results) + + // Skip url if it's just the end of a filepath + if (prev?.type === ParsedType.Text && prev.value.endsWith("/")) { + return + } + + // Strip hash component + let [url, hash] = link.split("#") + + // Skip ellipses and very short non-urls + if (url.match(/\.\./)) { + return + } + + // Make sure there's a protocol + if (!url.match("^\w+://")) { + url = "https://" + url + } + + const meta = Object.fromEntries(new URLSearchParams(hash).entries()) + + for (const tag of context.tags) { + if (tag[0] === 'imeta' && tag.find(t => t.includes(`url ${raw}`))) { + Object.assign(meta, Object.fromEntries(tag.slice(1).map((m: string) => m.split(" ")))) + } + } + + const isMedia = Boolean( + url.match(/\.(jpe?g|png|wav|mp3|mp4|mov|avi|webm|webp|gif|bmp|svg)$/) && + last(url.replace(/\/$/, "").split("://"))?.includes("/") + ) + + const value = {url, hash, meta, isMedia} + + return {type: ParsedType.Link, value, raw} +} + +export const parseNewline = (raw: string, context: ParseContext): ParsedNewline | void => { + const [value] = raw.match(/^\n+/) || [] + + if (value) { + return {type: ParsedType.Newline, raw, value} + } +} + +export const parseProfile = (raw: string, context: ParseContext): ParsedProfile | void => { + const [entity] = raw.match(/^(web\+)?(nostr:)?\/?\/?n(profile|pub)1[\d\w]+/i) || [] + + if (entity) { + try { + const {type, data} = nip19.decode(fromNostrURI(entity)) + const value = type === "npub" + ? {pubkey: data as string, relays: []} + : data as ProfilePointer + + return {type: ParsedType.Profile, value, raw} + } catch (e) { + // Pass + } + } +} + +export const parseTopic = (raw: string, context: ParseContext): ParsedTopic | void => { + const [value] = raw.match(/^#[^\s!\"#$%&'()*+,-.\/:;<=>?@[\\\]^_`{|}~]+/i) || [] + + // Skip numeric topics + if (value && !value.match(/^#\d+$/)) { + return {type: ParsedType.Topic, raw, value} + } +} + + +// Parse other formats to known types + +export const parseLegacyMention = (raw: string, context: ParseContext): ParsedProfile | ParsedEvent | void => { + const mentionMatch = raw.match(/^#\[(\d+)\]/i) || [] + + if (mentionMatch) { + const [tag, value, url] = context.tags[parseInt(mentionMatch[1])] || [] + const relays = url ? [url] : [] + + if (tag === "p") { + return {type: ParsedType.Profile, value: {pubkey: value, relays}, raw} + } + + if (tag === "e") { + return {type: ParsedType.Event, value: {id: value, relays}, raw} + } + } +} + +export const parsers = [ + parseNewline, + parseLegacyMention, + parseTopic, + parseCodeBlock, + parseCodeInline, + parseAddress, + parseProfile, + parseEvent, + parseCashu, + parseInvoice, + parseLink +] + +export const parseNext = (raw: string, context: ParseContext): Parsed | void => { + for (const parser of parsers) { + const result = parser(raw, context) + + if (result) { + return result + } + } +} + +// Main exports + +export const parse = ({content = "", tags = []}: {content?: string; tags?: string[][]}) => { + const context: ParseContext = {content, tags, results: []} + + let buffer = "" + let remaining = content.trim() || tags.find(t => t[0] === "alt")?.[1] || "" + + while (remaining) { + const parsed = parseNext(remaining, context) + + if (parsed) { + if (buffer) { + context.results.push({type: ParsedType.Text, value: buffer, raw: buffer}) + buffer = "" + } + + context.results.push(parsed) + remaining = remaining.slice(parsed.raw.length) + } else { + // Instead of going character by character and re-running all the above regular expressions + // a million times, try to match the next word and add it to the buffer + const [match] = remaining.match(/^[\w\d]+ ?/i) || remaining[0] + + buffer += match + remaining = remaining.slice(match.length) + } + } + + if (buffer) { + context.results.push({type: ParsedType.Text, value: buffer, raw: buffer}) + } + + return context.results +} + +type TruncateOpts = { + minLength: number + maxLength: number + mediaLength: number + entityLength: number +} + +export const truncate = ( + content: Parsed[], + { + minLength = 400, + maxLength = 600, + mediaLength = 200, + entityLength = 30, + }: TruncateOpts, +) => { + // Get a list of content sizes so we know where to truncate + // Non-plaintext things might take up more or less room if rendered + const sizes = content.map((parsed: Parsed) => { + switch (parsed.type) { + case ParsedType.Link: + case ParsedType.Cashu: + case ParsedType.Invoice: + return mediaLength + case ParsedType.Event: + case ParsedType.Address: + case ParsedType.Profile: + return entityLength + default: + return parsed.value.length + } + }) + + // If total size fits inside our max, we're done + if (sizes.reduce((r, x) => r + x, 0) < maxLength) { + return content + } + + let currentSize = 0 + + // Otherwise, truncate more then necessary so that when the user expands the note + // they have more than just a tiny bit to look at. Truncating a single word is annoying. + sizes.every((size, i) => { + currentSize += size + + // Don't truncate down to nothing + if (currentSize > minLength && i > 0) { + content = content.slice(0, i) + + return false + } + + + return true + }) + + return content +} diff --git a/packages/content/package.json b/packages/content/package.json new file mode 100644 index 0000000..f83c621 --- /dev/null +++ b/packages/content/package.json @@ -0,0 +1,36 @@ +{ + "name": "@welshman/content", + "version": "0.0.1", + "author": "hodlbod", + "license": "MIT", + "description": "A collection of utilities for parsing nostr note content.", + "publishConfig": { + "access": "public" + }, + "type": "module", + "files": [ + "build" + ], + "types": "./build/index.d.ts", + "exports": { + ".": { + "types": "./build/index.d.ts", + "import": "./build/index.mjs", + "require": "./build/index.cjs" + } + }, + "scripts": { + "pub": "npm run lint && npm run build && npm publish", + "build": "gts clean && tsc-multi", + "lint": "gts lint", + "fix": "gts fix" + }, + "devDependencies": { + "gts": "^5.0.1", + "tsc-multi": "^1.1.0", + "typescript": "~5.1.6" + }, + "dependencies": { + "nostr-tools": "^2.7.0" + } +} diff --git a/packages/content/tsc-multi.json b/packages/content/tsc-multi.json new file mode 100644 index 0000000..6c37019 --- /dev/null +++ b/packages/content/tsc-multi.json @@ -0,0 +1,7 @@ +{ + "targets": [ + {"extname": ".cjs", "module": "commonjs"}, + {"extname": ".mjs", "module": "esnext", "moduleResolution": "node"} + ], + "projects": ["tsconfig.json"] +} diff --git a/packages/content/tsconfig.json b/packages/content/tsconfig.json new file mode 100644 index 0000000..15d351a --- /dev/null +++ b/packages/content/tsconfig.json @@ -0,0 +1,11 @@ +{ + "extends": "../../node_modules/gts/tsconfig-google.json", + "compilerOptions": { + "rootDir": ".", + "outDir": "build", + "esModuleInterop": true, + "skipLibCheck": true, + "lib": ["esnext", "dom", "dom.iterable"] + }, + "include": ["**/*.ts"] +}