Add content package

2024-06-03 15:05:37 -07:00
parent d4cecd5e50
commit d2a3f14567
8 changed files with 506 additions and 2 deletions
@@ -21,6 +21,8 @@
    "@typescript-eslint/no-unused-vars": ["error", {"args": "none"}],
    "@typescript-eslint/no-explicit-any": "off",
    "@typescript-eslint/ban-ts-comment": "off",
+    "no-useless-escape": "off",
+    "prefer-const": ["error", {"destructuring": "all"}],
    "object-curly-spacing": ["error", "never"],
    "array-bracket-spacing": ["error", "never"],
    "semi": ["error", "never"]
@@ -532,6 +532,10 @@
      "dev": true,
      "license": "ISC"
    },
+    "node_modules/@welshman/content": {
+      "resolved": "packages/content",
+      "link": true
+    },
    "node_modules/@welshman/feeds": {
      "resolved": "packages/feeds",
      "link": true
@@ -2004,8 +2008,9 @@
      }
    },
    "node_modules/nostr-tools": {
-      "version": "2.3.2",
-      "license": "Unlicense",
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/nostr-tools/-/nostr-tools-2.7.0.tgz",
+      "integrity": "sha512-jJoL2J1CBiKDxaXZww27nY/Wsuxzx7AULxmGKFce4sskDu1tohNyfnzYQ8BvDyvkstU8kNZUAXPL32tre33uig==",
      "dependencies": {
        "@noble/ciphers": "^0.5.1",
        "@noble/curves": "1.2.0",
@@ -3057,6 +3062,19 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
+    "packages/content": {
+      "name": "@welshman/content",
+      "version": "0.0.1",
+      "license": "MIT",
+      "dependencies": {
+        "nostr-tools": "^2.7.0"
+      },
+      "devDependencies": {
+        "gts": "^5.0.1",
+        "tsc-multi": "^1.1.0",
+        "typescript": "~5.1.6"
+      }
+    },
    "packages/feeds": {
      "name": "@welshman/feeds",
      "version": "0.0.9",
@@ -0,0 +1,2 @@
+build
+normalize-url
@@ -0,0 +1,3 @@
+# @welshman/content [![version](https://badgen.net/npm/v/@welshman/content)](https://npmjs.com/package/@welshman/content)
+
+Utilities for parsing note content.
@@ -0,0 +1,425 @@
+import {nip19} from "nostr-tools"
+
+const last = <T>(xs: T[], ...args: unknown[]) => xs[xs.length - 1]
+
+const fromNostrURI = (s: string) => s.replace(/^nostr:\/?\/?/, "")
+
+// Copy some types from nostr-tools because I can't import them
+
+type AddressPointer = {
+  identifier: string
+  pubkey: string
+  kind: number
+  relays?: string[]
+}
+
+type EventPointer = {
+  id: string
+  relays?: string[]
+  author?: string
+  kind?: number
+}
+
+type ProfilePointer = {
+  pubkey: string
+  relays?: string[]
+}
+
+// Types
+
+export type ParseContext = {
+  results: Parsed[]
+  content: string,
+  tags: string[][]
+}
+
+export enum ParsedType {
+  Address = "address",
+  Cashu = "cashu",
+  CodeBlock = "code_block",
+  CodeInline = "code_inline",
+  Ellipsis = "ellipsis",
+  Event = "event",
+  Invoice = "invoice",
+  Link = "link",
+  Newline = "newline",
+  Profile = "profile",
+  Text = "text",
+  Topic = "topic",
+}
+
+export type ParsedCashu = {
+  type: ParsedType.Cashu
+  value: string
+  raw: string
+}
+
+export type ParsedCodeBlock = {
+  type: ParsedType.CodeBlock
+  value: string
+  raw: string
+}
+
+export type ParsedCodeInline = {
+  type: ParsedType.CodeInline
+  value: string
+  raw: string
+}
+
+export type ParsedEllipsis = {
+  type: ParsedType.Ellipsis
+  value: string
+  raw: string
+}
+
+export type ParsedInvoice = {
+  type: ParsedType.Invoice
+  value: string
+  raw: string
+}
+
+export type ParsedLinkValue = {
+  url: string
+  hash: string
+  meta: Record<string, string>
+  isMedia: boolean
+}
+
+export type ParsedLink = {
+  type: ParsedType.Link
+  value: ParsedLinkValue
+  raw: string
+}
+
+export type ParsedNewline = {
+  type: ParsedType.Newline
+  value: string
+  raw: string
+}
+
+export type ParsedText = {
+  type: ParsedType.Text
+  value: string
+  raw: string
+}
+
+export type ParsedTopic = {
+  type: ParsedType.Topic
+  value: string
+  raw: string
+}
+
+export type ParsedEvent = {
+  type: ParsedType.Event
+  value: EventPointer
+  raw: string
+}
+
+export type ParsedProfile = {
+  type: ParsedType.Profile
+  value: ProfilePointer
+  raw: string
+}
+
+export type ParsedAddress = {
+  type: ParsedType.Address
+  value: AddressPointer
+  raw: string
+}
+
+export type Parsed =
+  ParsedAddress |
+  ParsedCashu |
+  ParsedCodeBlock |
+  ParsedCodeInline |
+  ParsedEllipsis |
+  ParsedEvent |
+  ParsedInvoice |
+  ParsedLink |
+  ParsedNewline |
+  ParsedProfile |
+  ParsedText |
+  ParsedTopic
+
+// Parsers for known formats
+
+export const parseAddress = (raw: string, context: ParseContext): ParsedAddress | void => {
+  const [naddr] = raw.match(/^(web\+)?(nostr:)?\/?\/?naddr1[\d\w]+/i) || []
+
+  if (naddr) {
+    try {
+      const {data} = nip19.decode(fromNostrURI(naddr))
+
+      return {type: ParsedType.Address, value: data as AddressPointer, raw}
+    } catch (e) {
+      // Pass
+    }
+  }
+}
+
+export const parseCashu = (raw: string, context: ParseContext): ParsedCashu | void => {
+  const [value] = raw.match(/^(cashu)[\d\w=]{50,5000}/i) || []
+
+  if (value) {
+    return {type: ParsedType.Cashu, value, raw}
+  }
+}
+
+export const parseCodeBlock = (raw: string, context: ParseContext): ParsedCodeBlock | void => {
+  const [code, value] = raw.match(/^```([^]*?)```/i) || []
+
+  if (code) {
+    return {type: ParsedType.CodeBlock, value, raw}
+  }
+}
+
+export const parseCodeInline = (raw: string, context: ParseContext): ParsedCodeInline | void => {
+  const [code, value] = raw.match(/^`(.*?)`/i) || []
+
+  if (code) {
+    return {type: ParsedType.CodeInline, value, raw}
+  }
+}
+
+export const parseEvent = (raw: string, context: ParseContext): ParsedEvent | void => {
+  const [entity] = raw.match(/^(web\+)?(nostr:)?\/?\/?n(event|ote)1[\d\w]+/i) || []
+
+  if (entity) {
+    try {
+      const {type, data} = nip19.decode(fromNostrURI(entity))
+      const value = type === "note"
+        ? {id: data as string, relays: []}
+        : data as EventPointer
+
+      return {type: ParsedType.Event, value, raw}
+    } catch (e) {
+      // Pass
+    }
+  }
+}
+
+export const parseInvoice = (raw: string, context: ParseContext): ParsedInvoice | void => {
+  const [value] = raw.match(/^ln(lnbc|lnurl)[\d\w]{50,1000}/i) || []
+
+  if (value) {
+    return {type: ParsedType.Invoice, value, raw}
+  }
+}
+
+export const parseLink = (raw: string, context: ParseContext): ParsedLink | void => {
+  const [link] = raw.match(/^([a-z\+:]{2,30}:\/\/)?[^<>\(\)\s]+\.[a-z]{2,6}[^\s]*[^<>"'\.!?,:\s\)\(]/gi) || []
+
+  if (!link) {
+    return
+  }
+
+  const prev = last(context.results)
+
+  // Skip url if it's just the end of a filepath
+  if (prev?.type === ParsedType.Text && prev.value.endsWith("/")) {
+    return
+  }
+
+  // Strip hash component
+  let [url, hash] = link.split("#")
+
+  // Skip ellipses and very short non-urls
+  if (url.match(/\.\./)) {
+    return
+  }
+
+  // Make sure there's a protocol
+  if (!url.match("^\w+://")) {
+    url = "https://" + url
+  }
+
+  const meta = Object.fromEntries(new URLSearchParams(hash).entries())
+
+  for (const tag of context.tags) {
+    if (tag[0] === 'imeta' && tag.find(t => t.includes(`url ${raw}`))) {
+      Object.assign(meta, Object.fromEntries(tag.slice(1).map((m: string) => m.split(" "))))
+    }
+  }
+
+  const isMedia = Boolean(
+    url.match(/\.(jpe?g|png|wav|mp3|mp4|mov|avi|webm|webp|gif|bmp|svg)$/) &&
+    last(url.replace(/\/$/, "").split("://"))?.includes("/")
+  )
+
+  const value = {url, hash, meta, isMedia}
+
+  return {type: ParsedType.Link, value, raw}
+}
+
+export const parseNewline = (raw: string, context: ParseContext): ParsedNewline | void => {
+  const [value] = raw.match(/^\n+/) || []
+
+  if (value) {
+    return {type: ParsedType.Newline, raw, value}
+  }
+}
+
+export const parseProfile = (raw: string, context: ParseContext): ParsedProfile | void => {
+  const [entity] = raw.match(/^(web\+)?(nostr:)?\/?\/?n(profile|pub)1[\d\w]+/i) || []
+
+  if (entity) {
+    try {
+      const {type, data} = nip19.decode(fromNostrURI(entity))
+      const value = type === "npub"
+        ? {pubkey: data as string, relays: []}
+        : data as ProfilePointer
+
+      return {type: ParsedType.Profile, value, raw}
+    } catch (e) {
+      // Pass
+    }
+  }
+}
+
+export const parseTopic = (raw: string, context: ParseContext): ParsedTopic | void => {
+  const [value] = raw.match(/^#[^\s!\"#$%&'()*+,-.\/:;<=>?@[\\\]^_`{|}~]+/i) || []
+
+  // Skip numeric topics
+  if (value && !value.match(/^#\d+$/)) {
+    return {type: ParsedType.Topic, raw, value}
+  }
+}
+
+
+// Parse other formats to known types
+
+export const parseLegacyMention = (raw: string, context: ParseContext): ParsedProfile | ParsedEvent | void => {
+  const mentionMatch = raw.match(/^#\[(\d+)\]/i) || []
+
+  if (mentionMatch) {
+    const [tag, value, url] = context.tags[parseInt(mentionMatch[1])] || []
+    const relays = url ? [url] : []
+
+    if (tag === "p") {
+      return {type: ParsedType.Profile, value: {pubkey: value, relays}, raw}
+    }
+
+    if (tag === "e") {
+      return {type: ParsedType.Event, value: {id: value, relays}, raw}
+    }
+  }
+}
+
+export const parsers = [
+  parseNewline,
+  parseLegacyMention,
+  parseTopic,
+  parseCodeBlock,
+  parseCodeInline,
+  parseAddress,
+  parseProfile,
+  parseEvent,
+  parseCashu,
+  parseInvoice,
+  parseLink
+]
+
+export const parseNext = (raw: string, context: ParseContext): Parsed | void => {
+  for (const parser of parsers) {
+    const result = parser(raw, context)
+
+    if (result) {
+      return result
+    }
+  }
+}
+
+// Main exports
+
+export const parse = ({content = "", tags = []}: {content?: string; tags?: string[][]}) => {
+  const context: ParseContext = {content, tags, results: []}
+
+  let buffer = ""
+  let remaining = content.trim() || tags.find(t => t[0] === "alt")?.[1] || ""
+
+  while (remaining) {
+    const parsed = parseNext(remaining, context)
+
+    if (parsed) {
+      if (buffer) {
+        context.results.push({type: ParsedType.Text, value: buffer, raw: buffer})
+        buffer = ""
+      }
+
+      context.results.push(parsed)
+      remaining = remaining.slice(parsed.raw.length)
+    } else {
+      // Instead of going character by character and re-running all the above regular expressions
+      // a million times, try to match the next word and add it to the buffer
+      const [match] = remaining.match(/^[\w\d]+ ?/i) || remaining[0]
+
+      buffer += match
+      remaining = remaining.slice(match.length)
+    }
+  }
+
+  if (buffer) {
+    context.results.push({type: ParsedType.Text, value: buffer, raw: buffer})
+  }
+
+  return context.results
+}
+
+type TruncateOpts = {
+  minLength: number
+  maxLength: number
+  mediaLength: number
+  entityLength: number
+}
+
+export const truncate = (
+  content: Parsed[],
+  {
+    minLength = 400,
+    maxLength = 600,
+    mediaLength = 200,
+    entityLength = 30,
+  }: TruncateOpts,
+) => {
+  // Get a list of content sizes so we know where to truncate
+  // Non-plaintext things might take up more or less room if rendered
+  const sizes = content.map((parsed: Parsed) => {
+    switch (parsed.type) {
+      case ParsedType.Link:
+      case ParsedType.Cashu:
+      case ParsedType.Invoice:
+        return mediaLength
+      case ParsedType.Event:
+      case ParsedType.Address:
+      case ParsedType.Profile:
+        return entityLength
+      default:
+        return parsed.value.length
+    }
+  })
+
+  // If total size fits inside our max, we're done
+  if (sizes.reduce((r, x) => r + x, 0) < maxLength) {
+    return content
+  }
+
+  let currentSize = 0
+
+  // Otherwise, truncate more then necessary so that when the user expands the note
+  // they have more than just a tiny bit to look at. Truncating a single word is annoying.
+  sizes.every((size, i) => {
+    currentSize += size
+
+    // Don't truncate down to nothing
+    if (currentSize > minLength && i > 0) {
+      content = content.slice(0, i)
+
+      return false
+    }
+
+
+    return true
+  })
+
+  return content
+}
@@ -0,0 +1,36 @@
+{
+  "name": "@welshman/content",
+  "version": "0.0.1",
+  "author": "hodlbod",
+  "license": "MIT",
+  "description": "A collection of utilities for parsing nostr note content.",
+  "publishConfig": {
+    "access": "public"
+  },
+  "type": "module",
+  "files": [
+    "build"
+  ],
+  "types": "./build/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./build/index.d.ts",
+      "import": "./build/index.mjs",
+      "require": "./build/index.cjs"
+    }
+  },
+  "scripts": {
+    "pub": "npm run lint && npm run build && npm publish",
+    "build": "gts clean && tsc-multi",
+    "lint": "gts lint",
+    "fix": "gts fix"
+  },
+  "devDependencies": {
+    "gts": "^5.0.1",
+    "tsc-multi": "^1.1.0",
+    "typescript": "~5.1.6"
+  },
+  "dependencies": {
+    "nostr-tools": "^2.7.0"
+  }
+}
@@ -0,0 +1,7 @@
+{
+  "targets": [
+    {"extname": ".cjs", "module": "commonjs"},
+    {"extname": ".mjs", "module": "esnext", "moduleResolution": "node"}
+  ],
+  "projects": ["tsconfig.json"]
+}
@@ -0,0 +1,11 @@
+{
+  "extends": "../../node_modules/gts/tsconfig-google.json",
+  "compilerOptions": {
+    "rootDir": ".",
+    "outDir": "build",
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "lib": ["esnext", "dom", "dom.iterable"]
+  },
+  "include": ["**/*.ts"]
+}