This commit is contained in:
Jon Staab
2025-04-09 11:35:09 -07:00
parent 5f3624b8f3
commit 728ad1fba0
37 changed files with 1039 additions and 2183 deletions
+261 -253
View File
@@ -1,12 +1,13 @@
// Copied from https://github.com/sindresorhus/normalize-url
/* eslint-disable */
export type Options = {
/**
/**
@default 'http'
*/
readonly defaultProtocol?: 'https' | 'http';
readonly defaultProtocol?: "https" | "http"
/**
/**
Prepends `defaultProtocol` to the URL if it's protocol-relative.
@default true
@@ -20,9 +21,9 @@ export type Options = {
//=> '//sindresorhus.com'
```
*/
readonly normalizeProtocol?: boolean;
readonly normalizeProtocol?: boolean
/**
/**
Normalizes HTTPS URLs to HTTP.
@default false
@@ -36,9 +37,9 @@ export type Options = {
//=> 'http://sindresorhus.com'
```
*/
readonly forceHttp?: boolean;
readonly forceHttp?: boolean
/**
/**
Normalizes HTTP URLs to HTTPS.
This option cannot be used with the `forceHttp` option at the same time.
@@ -54,9 +55,9 @@ export type Options = {
//=> 'https://sindresorhus.com'
```
*/
readonly forceHttps?: boolean;
readonly forceHttps?: boolean
/**
/**
Strip the [authentication](https://en.wikipedia.org/wiki/Basic_access_authentication) part of a URL.
@default true
@@ -70,9 +71,9 @@ export type Options = {
//=> 'https://user:password@sindresorhus.com'
```
*/
readonly stripAuthentication?: boolean;
readonly stripAuthentication?: boolean
/**
/**
Removes hash from the URL.
@default false
@@ -86,9 +87,9 @@ export type Options = {
//=> 'http://sindresorhus.com/about.html'
```
*/
readonly stripHash?: boolean;
readonly stripHash?: boolean
/**
/**
Remove the protocol from the URL: `http://sindresorhus.com` → `sindresorhus.com`.
It will only remove `https://` and `http://` protocols.
@@ -104,9 +105,9 @@ export type Options = {
//=> 'sindresorhus.com'
```
*/
readonly stripProtocol?: boolean;
readonly stripProtocol?: boolean
/**
/**
Strip the [text fragment](https://web.dev/text-fragments/) part of the URL
__Note:__ The text fragment will always be removed if the `stripHash` option is set to `true`, as the hash contains the text fragment.
@@ -128,9 +129,9 @@ export type Options = {
//=> 'http://sindresorhus.com/about.html#section:~:text=hello'
```
*/
readonly stripTextFragment?: boolean;
readonly stripTextFragment?: boolean
/**
/**
Removes `www.` from the URL.
@default true
@@ -144,9 +145,9 @@ export type Options = {
//=> 'http://www.sindresorhus.com'
```
*/
readonly stripWWW?: boolean;
readonly stripWWW?: boolean
/**
/**
Removes query parameters that matches any of the provided strings or regexes.
@default [/^utm_\w+/i]
@@ -177,9 +178,9 @@ export type Options = {
//=> 'http://www.sindresorhus.com/?foo=bar&ref=test_ref&utm_medium=test'
```
*/
readonly removeQueryParameters?: ReadonlyArray<RegExp | string> | boolean;
readonly removeQueryParameters?: ReadonlyArray<RegExp | string> | boolean
/**
/**
Keeps only query parameters that matches any of the provided strings or regexes.
__Note__: It overrides the `removeQueryParameters` option.
@@ -194,9 +195,9 @@ export type Options = {
//=> 'https://sindresorhus.com/?ref=unicorn'
```
*/
readonly keepQueryParameters?: ReadonlyArray<RegExp | string>;
readonly keepQueryParameters?: ReadonlyArray<RegExp | string>
/**
/**
Removes trailing slash.
__Note__: Trailing slash is always removed if the URL doesn't have a pathname unless the `removeSingleSlash` option is set to `false`.
@@ -215,9 +216,9 @@ export type Options = {
//=> 'http://sindresorhus.com'
```
*/
readonly removeTrailingSlash?: boolean;
readonly removeTrailingSlash?: boolean
/**
/**
Remove a sole `/` pathname in the output. This option is independent of `removeTrailingSlash`.
@default true
@@ -231,9 +232,9 @@ export type Options = {
//=> 'https://sindresorhus.com/'
```
*/
readonly removeSingleSlash?: boolean;
readonly removeSingleSlash?: boolean
/**
/**
Removes the default directory index file from path that matches any of the provided strings or regexes.
When `true`, the regex `/^index\.[a-z]+$/` is used.
@@ -247,9 +248,9 @@ export type Options = {
//=> 'http://sindresorhus.com/foo'
```
*/
readonly removeDirectoryIndex?: boolean | ReadonlyArray<RegExp | string>;
readonly removeDirectoryIndex?: boolean | ReadonlyArray<RegExp | string>
/**
/**
Removes an explicit port number from the URL.
Port 443 is always removed from HTTPS URLs and 80 is always removed from HTTP URLs regardless of this option.
@@ -264,9 +265,9 @@ export type Options = {
//=> 'http://sindresorhus.com'
```
*/
readonly removeExplicitPort?: boolean;
readonly removeExplicitPort?: boolean
/**
/**
Sorts the query parameters alphabetically by key.
@default true
@@ -279,79 +280,74 @@ export type Options = {
//=> 'http://sindresorhus.com/?b=two&a=one&c=three'
```
*/
readonly sortQueryParameters?: boolean;
};
readonly sortQueryParameters?: boolean
}
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain'
const DATA_URL_DEFAULT_CHARSET = 'us-ascii'
const DATA_URL_DEFAULT_MIME_TYPE = "text/plain"
const DATA_URL_DEFAULT_CHARSET = "us-ascii"
const testParameter = (name: string, filters: any[]) => filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name)
const testParameter = (name: string, filters: any[]) =>
filters.some(filter => (filter instanceof RegExp ? filter.test(name) : filter === name))
const supportedProtocols = new Set([
'https:',
'http:',
'file:',
])
const supportedProtocols = new Set(["https:", "http:", "file:"])
const hasCustomProtocol = (urlString: string) => {
try {
const {protocol} = new URL(urlString)
return protocol.endsWith(':') && !supportedProtocols.has(protocol)
} catch {
return false
}
try {
const {protocol} = new URL(urlString)
return protocol.endsWith(":") && !supportedProtocols.has(protocol)
} catch {
return false
}
}
const normalizeDataURL = (urlString: string, {stripHash}: {stripHash: boolean}) => {
const match = /^data:(?<type>[^,]*?),(?<data>[^#]*?)(?:#(?<hash>.*))?$/.exec(urlString)
const match = /^data:(?<type>[^,]*?),(?<data>[^#]*?)(?:#(?<hash>.*))?$/.exec(urlString)
if (!match) {
throw new Error(`Invalid URL: ${urlString}`)
}
if (!match) {
throw new Error(`Invalid URL: ${urlString}`)
}
let {type, data, hash} = match.groups as any
const mediaType = type.split(';')
hash = stripHash ? '' : hash
let {type, data, hash} = match.groups as any
const mediaType = type.split(";")
hash = stripHash ? "" : hash
let isBase64 = false
if (mediaType[mediaType.length - 1] === 'base64') {
mediaType.pop()
isBase64 = true
}
let isBase64 = false
if (mediaType[mediaType.length - 1] === "base64") {
mediaType.pop()
isBase64 = true
}
// Lowercase MIME type
const mimeType = mediaType.shift()?.toLowerCase() ?? ''
const attributes = mediaType
.map((attribute: string) => {
let [key, value = ''] = attribute.split('=').map((s: string) => s.trim())
// Lowercase MIME type
const mimeType = mediaType.shift()?.toLowerCase() ?? ""
const attributes = mediaType
.map((attribute: string) => {
let [key, value = ""] = attribute.split("=").map((s: string) => s.trim())
// Lowercase `charset`
if (key === 'charset') {
value = value.toLowerCase()
// Lowercase `charset`
if (key === "charset") {
value = value.toLowerCase()
if (value === DATA_URL_DEFAULT_CHARSET) {
return ''
}
}
if (value === DATA_URL_DEFAULT_CHARSET) {
return ""
}
}
return `${key}${value ? `=${value}` : ''}`
})
.filter(Boolean)
return `${key}${value ? `=${value}` : ""}`
})
.filter(Boolean)
const normalizedMediaType = [
...attributes,
]
const normalizedMediaType = [...attributes]
if (isBase64) {
normalizedMediaType.push('base64')
}
if (isBase64) {
normalizedMediaType.push("base64")
}
if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) {
normalizedMediaType.unshift(mimeType)
}
if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) {
normalizedMediaType.unshift(mimeType)
}
return `data:${normalizedMediaType.join(';')},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ''}`
return `data:${normalizedMediaType.join(";")},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ""}`
}
/**
@@ -374,212 +370,224 @@ normalizeUrl('//www.sindresorhus.com:80/../baz?b=bar&a=foo');
*/
export default function normalizeUrl(urlString: string, opts?: Options): string {
const options = {
defaultProtocol: 'http',
normalizeProtocol: true,
forceHttp: false,
forceHttps: false,
stripAuthentication: true,
stripHash: false,
stripTextFragment: true,
stripWWW: true,
removeQueryParameters: [/^utm_\w+/i],
removeTrailingSlash: true,
removeSingleSlash: true,
removeDirectoryIndex: false,
removeExplicitPort: false,
sortQueryParameters: true,
...opts,
}
const options = {
defaultProtocol: "http",
normalizeProtocol: true,
forceHttp: false,
forceHttps: false,
stripAuthentication: true,
stripHash: false,
stripTextFragment: true,
stripWWW: true,
removeQueryParameters: [/^utm_\w+/i],
removeTrailingSlash: true,
removeSingleSlash: true,
removeDirectoryIndex: false,
removeExplicitPort: false,
sortQueryParameters: true,
...opts,
}
// Legacy: Append `:` to the protocol if missing.
if (typeof options.defaultProtocol === 'string' && !options.defaultProtocol.endsWith(':')) {
options.defaultProtocol = `${options.defaultProtocol}:`
}
// Legacy: Append `:` to the protocol if missing.
if (typeof options.defaultProtocol === "string" && !options.defaultProtocol.endsWith(":")) {
options.defaultProtocol = `${options.defaultProtocol}:`
}
urlString = urlString.trim()
urlString = urlString.trim()
// Data URL
if (/^data:/i.test(urlString)) {
return normalizeDataURL(urlString, options)
}
// Data URL
if (/^data:/i.test(urlString)) {
return normalizeDataURL(urlString, options)
}
if (hasCustomProtocol(urlString)) {
return urlString
}
if (hasCustomProtocol(urlString)) {
return urlString
}
const hasRelativeProtocol = urlString.startsWith('//')
const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString)
const hasRelativeProtocol = urlString.startsWith("//")
const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString)
// Prepend protocol
if (!isRelativeUrl) {
urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol)
}
// Prepend protocol
if (!isRelativeUrl) {
urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol)
}
const urlObject = new URL(urlString)
const urlObject = new URL(urlString)
if (options.forceHttp && options.forceHttps) {
throw new Error('The `forceHttp` and `forceHttps` options cannot be used together')
}
if (options.forceHttp && options.forceHttps) {
throw new Error("The `forceHttp` and `forceHttps` options cannot be used together")
}
if (options.forceHttp && urlObject.protocol === 'https:') {
urlObject.protocol = 'http:'
}
if (options.forceHttp && urlObject.protocol === "https:") {
urlObject.protocol = "http:"
}
if (options.forceHttps && urlObject.protocol === 'http:') {
urlObject.protocol = 'https:'
}
if (options.forceHttps && urlObject.protocol === "http:") {
urlObject.protocol = "https:"
}
// Remove auth
if (options.stripAuthentication) {
urlObject.username = ''
urlObject.password = ''
}
// Remove auth
if (options.stripAuthentication) {
urlObject.username = ""
urlObject.password = ""
}
// Remove hash
if (options.stripHash) {
urlObject.hash = ''
} else if (options.stripTextFragment) {
urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, '')
}
// Remove hash
if (options.stripHash) {
urlObject.hash = ""
} else if (options.stripTextFragment) {
urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, "")
}
// Remove duplicate slashes if not preceded by a protocol
// NOTE: This could be implemented using a single negative lookbehind
// regex, but we avoid that to maintain compatibility with older js engines
// which do not have support for that feature.
if (urlObject.pathname) {
// TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(?<!\b[a-z][a-z\d+\-.]{1,50}:)\/{2,}/g, '/');` when Safari supports negative lookbehind.
// Remove duplicate slashes if not preceded by a protocol
// NOTE: This could be implemented using a single negative lookbehind
// regex, but we avoid that to maintain compatibility with older js engines
// which do not have support for that feature.
if (urlObject.pathname) {
// TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(?<!\b[a-z][a-z\d+\-.]{1,50}:)\/{2,}/g, '/');` when Safari supports negative lookbehind.
// Split the string by occurrences of this protocol regex, and perform
// duplicate-slash replacement on the strings between those occurrences
// (if any).
const protocolRegex = /\b[a-z][a-z\d+\-.]{1,50}:\/\//g
// Split the string by occurrences of this protocol regex, and perform
// duplicate-slash replacement on the strings between those occurrences
// (if any).
const protocolRegex = /\b[a-z][a-z\d+\-.]{1,50}:\/\//g
let lastIndex = 0
let result = ''
for (;;) {
const match = protocolRegex.exec(urlObject.pathname)
if (!match) {
break
}
let lastIndex = 0
let result = ""
for (;;) {
const match = protocolRegex.exec(urlObject.pathname)
if (!match) {
break
}
const protocol = match[0]
const protocolAtIndex = match.index
const intermediate = urlObject.pathname.slice(lastIndex, protocolAtIndex)
const protocol = match[0]
const protocolAtIndex = match.index
const intermediate = urlObject.pathname.slice(lastIndex, protocolAtIndex)
result += intermediate.replace(/\/{2,}/g, '/')
result += protocol
lastIndex = protocolAtIndex + protocol.length
}
result += intermediate.replace(/\/{2,}/g, "/")
result += protocol
lastIndex = protocolAtIndex + protocol.length
}
const remnant = urlObject.pathname.slice(lastIndex, urlObject.pathname.length)
result += remnant.replace(/\/{2,}/g, '/')
const remnant = urlObject.pathname.slice(lastIndex, urlObject.pathname.length)
result += remnant.replace(/\/{2,}/g, "/")
urlObject.pathname = result
}
urlObject.pathname = result
}
// Decode URI octets
if (urlObject.pathname) {
try {
urlObject.pathname = decodeURI(urlObject.pathname)
} catch {}
}
// Decode URI octets
if (urlObject.pathname) {
try {
urlObject.pathname = decodeURI(urlObject.pathname)
} catch {}
}
// Remove directory index
if (options.removeDirectoryIndex === true) {
options.removeDirectoryIndex = [/^index\.[a-z]+$/]
}
// Remove directory index
if (options.removeDirectoryIndex === true) {
options.removeDirectoryIndex = [/^index\.[a-z]+$/]
}
if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) {
let pathComponents = urlObject.pathname.split('/')
const lastComponent = pathComponents[pathComponents.length - 1]
if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) {
let pathComponents = urlObject.pathname.split("/")
const lastComponent = pathComponents[pathComponents.length - 1]
if (testParameter(lastComponent, options.removeDirectoryIndex)) {
pathComponents = pathComponents.slice(0, -1)
urlObject.pathname = pathComponents.slice(1).join('/') + '/'
}
}
if (testParameter(lastComponent, options.removeDirectoryIndex)) {
pathComponents = pathComponents.slice(0, -1)
urlObject.pathname = pathComponents.slice(1).join("/") + "/"
}
}
if (urlObject.hostname) {
// Remove trailing dot
urlObject.hostname = urlObject.hostname.replace(/\.$/, '')
if (urlObject.hostname) {
// Remove trailing dot
urlObject.hostname = urlObject.hostname.replace(/\.$/, "")
// Remove `www.`
if (options.stripWWW && /^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)) {
// Each label should be max 63 at length (min: 1).
// Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
// Each TLD should be up to 63 characters long (min: 2).
// It is technically possible to have a single character TLD, but none currently exist.
urlObject.hostname = urlObject.hostname.replace(/^www\./, '')
}
}
// Remove `www.`
if (
options.stripWWW &&
/^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)
) {
// Each label should be max 63 at length (min: 1).
// Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
// Each TLD should be up to 63 characters long (min: 2).
// It is technically possible to have a single character TLD, but none currently exist.
urlObject.hostname = urlObject.hostname.replace(/^www\./, "")
}
}
// Remove query unwanted parameters
if (Array.isArray(options.removeQueryParameters)) {
// @ts-ignore
for (const key of [...urlObject.searchParams.keys()]) {
if (testParameter(key, options.removeQueryParameters)) {
urlObject.searchParams.delete(key)
}
}
}
// Remove query unwanted parameters
if (Array.isArray(options.removeQueryParameters)) {
// @ts-ignore
for (const key of [...urlObject.searchParams.keys()]) {
if (testParameter(key, options.removeQueryParameters)) {
urlObject.searchParams.delete(key)
}
}
}
if (!Array.isArray(options.keepQueryParameters) && options.removeQueryParameters === true) {
urlObject.search = ''
}
if (!Array.isArray(options.keepQueryParameters) && options.removeQueryParameters === true) {
urlObject.search = ""
}
// Keep wanted query parameters
if (Array.isArray(options.keepQueryParameters) && options.keepQueryParameters.length > 0) {
// @ts-ignore
for (const key of [...urlObject.searchParams.keys()]) {
if (!testParameter(key, options.keepQueryParameters)) {
urlObject.searchParams.delete(key)
}
}
}
// Keep wanted query parameters
if (Array.isArray(options.keepQueryParameters) && options.keepQueryParameters.length > 0) {
// @ts-ignore
for (const key of [...urlObject.searchParams.keys()]) {
if (!testParameter(key, options.keepQueryParameters)) {
urlObject.searchParams.delete(key)
}
}
}
// Sort query parameters
if (options.sortQueryParameters) {
urlObject.searchParams.sort()
// Sort query parameters
if (options.sortQueryParameters) {
urlObject.searchParams.sort()
// Calling `.sort()` encodes the search parameters, so we need to decode them again.
try {
urlObject.search = decodeURIComponent(urlObject.search)
} catch {}
}
// Calling `.sort()` encodes the search parameters, so we need to decode them again.
try {
urlObject.search = decodeURIComponent(urlObject.search)
} catch {}
}
if (options.removeTrailingSlash) {
urlObject.pathname = urlObject.pathname.replace(/\/$/, '')
}
if (options.removeTrailingSlash) {
urlObject.pathname = urlObject.pathname.replace(/\/$/, "")
}
// Remove an explicit port number, excluding a default port number, if applicable
if (options.removeExplicitPort && urlObject.port) {
urlObject.port = ''
}
// Remove an explicit port number, excluding a default port number, if applicable
if (options.removeExplicitPort && urlObject.port) {
urlObject.port = ""
}
const oldUrlString = urlString
const oldUrlString = urlString
// Take advantage of many of the Node `url` normalizations
urlString = urlObject.toString()
// Take advantage of many of the Node `url` normalizations
urlString = urlObject.toString()
if (!options.removeSingleSlash && urlObject.pathname === '/' && !oldUrlString.endsWith('/') && urlObject.hash === '') {
urlString = urlString.replace(/\/$/, '')
}
if (
!options.removeSingleSlash &&
urlObject.pathname === "/" &&
!oldUrlString.endsWith("/") &&
urlObject.hash === ""
) {
urlString = urlString.replace(/\/$/, "")
}
// Remove ending `/` unless removeSingleSlash is false
if ((options.removeTrailingSlash || urlObject.pathname === '/') && urlObject.hash === '' && options.removeSingleSlash) {
urlString = urlString.replace(/\/$/, '')
}
// Remove ending `/` unless removeSingleSlash is false
if (
(options.removeTrailingSlash || urlObject.pathname === "/") &&
urlObject.hash === "" &&
options.removeSingleSlash
) {
urlString = urlString.replace(/\/$/, "")
}
// Restore relative protocol, if applicable
if (hasRelativeProtocol && !options.normalizeProtocol) {
urlString = urlString.replace(/^http:\/\//, '//')
}
// Restore relative protocol, if applicable
if (hasRelativeProtocol && !options.normalizeProtocol) {
urlString = urlString.replace(/^http:\/\//, "//")
}
// Remove http/https
if (options.stripProtocol) {
urlString = urlString.replace(/^(?:https?:)?\/\//, '')
}
// Remove http/https
if (options.stripProtocol) {
urlString = urlString.replace(/^(?:https?:)?\/\//, "")
}
return urlString
return urlString
}