594 lines
16 KiB
TypeScript
594 lines
16 KiB
TypeScript
// Copied from https://github.com/sindresorhus/normalize-url
|
|
/* eslint-disable */
|
|
|
|
export type Options = {
|
|
/**
|
|
@default 'http'
|
|
*/
|
|
readonly defaultProtocol?: "https" | "http"
|
|
|
|
/**
|
|
Prepends `defaultProtocol` to the URL if it's protocol-relative.
|
|
|
|
@default true
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('//sindresorhus.com');
|
|
//=> 'http://sindresorhus.com'
|
|
|
|
normalizeUrl('//sindresorhus.com', {normalizeProtocol: false});
|
|
//=> '//sindresorhus.com'
|
|
```
|
|
*/
|
|
readonly normalizeProtocol?: boolean
|
|
|
|
/**
|
|
Normalizes HTTPS URLs to HTTP.
|
|
|
|
@default false
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('https://sindresorhus.com');
|
|
//=> 'https://sindresorhus.com'
|
|
|
|
normalizeUrl('https://sindresorhus.com', {forceHttp: true});
|
|
//=> 'http://sindresorhus.com'
|
|
```
|
|
*/
|
|
readonly forceHttp?: boolean
|
|
|
|
/**
|
|
Normalizes HTTP URLs to HTTPS.
|
|
|
|
This option cannot be used with the `forceHttp` option at the same time.
|
|
|
|
@default false
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('http://sindresorhus.com');
|
|
//=> 'http://sindresorhus.com'
|
|
|
|
normalizeUrl('http://sindresorhus.com', {forceHttps: true});
|
|
//=> 'https://sindresorhus.com'
|
|
```
|
|
*/
|
|
readonly forceHttps?: boolean
|
|
|
|
/**
|
|
Strip the [authentication](https://en.wikipedia.org/wiki/Basic_access_authentication) part of a URL.
|
|
|
|
@default true
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('user:password@sindresorhus.com');
|
|
//=> 'https://sindresorhus.com'
|
|
|
|
normalizeUrl('user:password@sindresorhus.com', {stripAuthentication: false});
|
|
//=> 'https://user:password@sindresorhus.com'
|
|
```
|
|
*/
|
|
readonly stripAuthentication?: boolean
|
|
|
|
/**
|
|
Removes hash from the URL.
|
|
|
|
@default false
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('sindresorhus.com/about.html#contact');
|
|
//=> 'http://sindresorhus.com/about.html#contact'
|
|
|
|
normalizeUrl('sindresorhus.com/about.html#contact', {stripHash: true});
|
|
//=> 'http://sindresorhus.com/about.html'
|
|
```
|
|
*/
|
|
readonly stripHash?: boolean
|
|
|
|
/**
|
|
Remove the protocol from the URL: `http://sindresorhus.com` → `sindresorhus.com`.
|
|
|
|
It will only remove `https://` and `http://` protocols.
|
|
|
|
@default false
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('https://sindresorhus.com');
|
|
//=> 'https://sindresorhus.com'
|
|
|
|
normalizeUrl('sindresorhus.com', {stripProtocol: true});
|
|
//=> 'sindresorhus.com'
|
|
```
|
|
*/
|
|
readonly stripProtocol?: boolean
|
|
|
|
/**
|
|
Strip the [text fragment](https://web.dev/text-fragments/) part of the URL
|
|
|
|
__Note:__ The text fragment will always be removed if the `stripHash` option is set to `true`, as the hash contains the text fragment.
|
|
|
|
@default true
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello');
|
|
//=> 'http://sindresorhus.com/about.html#'
|
|
|
|
normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello');
|
|
//=> 'http://sindresorhus.com/about.html#section'
|
|
|
|
normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello', {stripTextFragment: false});
|
|
//=> 'http://sindresorhus.com/about.html#:~:text=hello'
|
|
|
|
normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello', {stripTextFragment: false});
|
|
//=> 'http://sindresorhus.com/about.html#section:~:text=hello'
|
|
```
|
|
*/
|
|
readonly stripTextFragment?: boolean
|
|
|
|
/**
|
|
Removes `www.` from the URL.
|
|
|
|
@default true
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('http://www.sindresorhus.com');
|
|
//=> 'http://sindresorhus.com'
|
|
|
|
normalizeUrl('http://www.sindresorhus.com', {stripWWW: false});
|
|
//=> 'http://www.sindresorhus.com'
|
|
```
|
|
*/
|
|
readonly stripWWW?: boolean
|
|
|
|
/**
|
|
Removes query parameters that matches any of the provided strings or regexes.
|
|
|
|
@default [/^utm_\w+/i]
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('www.sindresorhus.com?foo=bar&ref=test_ref', {
|
|
removeQueryParameters: ['ref']
|
|
});
|
|
//=> 'http://sindresorhus.com/?foo=bar'
|
|
```
|
|
|
|
If a boolean is provided, `true` will remove all the query parameters.
|
|
|
|
```
|
|
normalizeUrl('www.sindresorhus.com?foo=bar', {
|
|
removeQueryParameters: true
|
|
});
|
|
//=> 'http://sindresorhus.com'
|
|
```
|
|
|
|
`false` will not remove any query parameter.
|
|
|
|
```
|
|
normalizeUrl('www.sindresorhus.com?foo=bar&utm_medium=test&ref=test_ref', {
|
|
removeQueryParameters: false
|
|
});
|
|
//=> 'http://www.sindresorhus.com/?foo=bar&ref=test_ref&utm_medium=test'
|
|
```
|
|
*/
|
|
readonly removeQueryParameters?: ReadonlyArray<RegExp | string> | boolean
|
|
|
|
/**
|
|
Keeps only query parameters that matches any of the provided strings or regexes.
|
|
|
|
__Note__: It overrides the `removeQueryParameters` option.
|
|
|
|
@default undefined
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('https://sindresorhus.com?foo=bar&ref=unicorn', {
|
|
keepQueryParameters: ['ref']
|
|
});
|
|
//=> 'https://sindresorhus.com/?ref=unicorn'
|
|
```
|
|
*/
|
|
readonly keepQueryParameters?: ReadonlyArray<RegExp | string>
|
|
|
|
/**
|
|
Removes trailing slash.
|
|
|
|
__Note__: Trailing slash is always removed if the URL doesn't have a pathname unless the `removeSingleSlash` option is set to `false`.
|
|
|
|
@default true
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('http://sindresorhus.com/redirect/');
|
|
//=> 'http://sindresorhus.com/redirect'
|
|
|
|
normalizeUrl('http://sindresorhus.com/redirect/', {removeTrailingSlash: false});
|
|
//=> 'http://sindresorhus.com/redirect/'
|
|
|
|
normalizeUrl('http://sindresorhus.com/', {removeTrailingSlash: false});
|
|
//=> 'http://sindresorhus.com'
|
|
```
|
|
*/
|
|
readonly removeTrailingSlash?: boolean
|
|
|
|
/**
|
|
Remove a sole `/` pathname in the output. This option is independent of `removeTrailingSlash`.
|
|
|
|
@default true
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('https://sindresorhus.com/');
|
|
//=> 'https://sindresorhus.com'
|
|
|
|
normalizeUrl('https://sindresorhus.com/', {removeSingleSlash: false});
|
|
//=> 'https://sindresorhus.com/'
|
|
```
|
|
*/
|
|
readonly removeSingleSlash?: boolean
|
|
|
|
/**
|
|
Removes the default directory index file from path that matches any of the provided strings or regexes.
|
|
When `true`, the regex `/^index\.[a-z]+$/` is used.
|
|
|
|
@default false
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('www.sindresorhus.com/foo/default.php', {
|
|
removeDirectoryIndex: [/^default\.[a-z]+$/]
|
|
});
|
|
//=> 'http://sindresorhus.com/foo'
|
|
```
|
|
*/
|
|
readonly removeDirectoryIndex?: boolean | ReadonlyArray<RegExp | string>
|
|
|
|
/**
|
|
Removes an explicit port number from the URL.
|
|
|
|
Port 443 is always removed from HTTPS URLs and 80 is always removed from HTTP URLs regardless of this option.
|
|
|
|
@default false
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('sindresorhus.com:123', {
|
|
removeExplicitPort: true
|
|
});
|
|
//=> 'http://sindresorhus.com'
|
|
```
|
|
*/
|
|
readonly removeExplicitPort?: boolean
|
|
|
|
/**
|
|
Sorts the query parameters alphabetically by key.
|
|
|
|
@default true
|
|
|
|
@example
|
|
```
|
|
normalizeUrl('www.sindresorhus.com?b=two&a=one&c=three', {
|
|
sortQueryParameters: false
|
|
});
|
|
//=> 'http://sindresorhus.com/?b=two&a=one&c=three'
|
|
```
|
|
*/
|
|
readonly sortQueryParameters?: boolean
|
|
}
|
|
|
|
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
|
|
const DATA_URL_DEFAULT_MIME_TYPE = "text/plain"
|
|
const DATA_URL_DEFAULT_CHARSET = "us-ascii"
|
|
|
|
const testParameter = (name: string, filters: any[]) =>
|
|
filters.some(filter => (filter instanceof RegExp ? filter.test(name) : filter === name))
|
|
|
|
const supportedProtocols = new Set(["https:", "http:", "file:"])
|
|
|
|
const hasCustomProtocol = (urlString: string) => {
|
|
try {
|
|
const {protocol} = new URL(urlString)
|
|
return protocol.endsWith(":") && !supportedProtocols.has(protocol)
|
|
} catch {
|
|
return false
|
|
}
|
|
}
|
|
|
|
const normalizeDataURL = (urlString: string, {stripHash}: {stripHash: boolean}) => {
|
|
const match = /^data:(?<type>[^,]*?),(?<data>[^#]*?)(?:#(?<hash>.*))?$/.exec(urlString)
|
|
|
|
if (!match) {
|
|
throw new Error(`Invalid URL: ${urlString}`)
|
|
}
|
|
|
|
let {type, data, hash} = match.groups as any
|
|
const mediaType = type.split(";")
|
|
hash = stripHash ? "" : hash
|
|
|
|
let isBase64 = false
|
|
if (mediaType[mediaType.length - 1] === "base64") {
|
|
mediaType.pop()
|
|
isBase64 = true
|
|
}
|
|
|
|
// Lowercase MIME type
|
|
const mimeType = mediaType.shift()?.toLowerCase() ?? ""
|
|
const attributes = mediaType
|
|
.map((attribute: string) => {
|
|
let [key, value = ""] = attribute.split("=").map((s: string) => s.trim())
|
|
|
|
// Lowercase `charset`
|
|
if (key === "charset") {
|
|
value = value.toLowerCase()
|
|
|
|
if (value === DATA_URL_DEFAULT_CHARSET) {
|
|
return ""
|
|
}
|
|
}
|
|
|
|
return `${key}${value ? `=${value}` : ""}`
|
|
})
|
|
.filter(Boolean)
|
|
|
|
const normalizedMediaType = [...attributes]
|
|
|
|
if (isBase64) {
|
|
normalizedMediaType.push("base64")
|
|
}
|
|
|
|
if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) {
|
|
normalizedMediaType.unshift(mimeType)
|
|
}
|
|
|
|
return `data:${normalizedMediaType.join(";")},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ""}`
|
|
}
|
|
|
|
/**
|
|
[Normalize](https://en.wikipedia.org/wiki/URL_normalization) a URL.
|
|
|
|
URLs with custom protocols are not normalized and just passed through by default. Supported protocols are: `https`, `http`, `file`, and `data`.
|
|
|
|
@param url - URL to normalize, including [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs).
|
|
|
|
@example
|
|
```
|
|
import normalizeUrl from 'normalize-url';
|
|
|
|
normalizeUrl('sindresorhus.com');
|
|
//=> 'http://sindresorhus.com'
|
|
|
|
normalizeUrl('//www.sindresorhus.com:80/../baz?b=bar&a=foo');
|
|
//=> 'http://sindresorhus.com/baz?a=foo&b=bar'
|
|
```
|
|
*/
|
|
|
|
export default function normalizeUrl(urlString: string, opts?: Options): string {
|
|
const options = {
|
|
defaultProtocol: "http",
|
|
normalizeProtocol: true,
|
|
forceHttp: false,
|
|
forceHttps: false,
|
|
stripAuthentication: true,
|
|
stripHash: false,
|
|
stripTextFragment: true,
|
|
stripWWW: true,
|
|
removeQueryParameters: [/^utm_\w+/i],
|
|
removeTrailingSlash: true,
|
|
removeSingleSlash: true,
|
|
removeDirectoryIndex: false,
|
|
removeExplicitPort: false,
|
|
sortQueryParameters: true,
|
|
...opts,
|
|
}
|
|
|
|
// Legacy: Append `:` to the protocol if missing.
|
|
if (typeof options.defaultProtocol === "string" && !options.defaultProtocol.endsWith(":")) {
|
|
options.defaultProtocol = `${options.defaultProtocol}:`
|
|
}
|
|
|
|
urlString = urlString.trim()
|
|
|
|
// Data URL
|
|
if (/^data:/i.test(urlString)) {
|
|
return normalizeDataURL(urlString, options)
|
|
}
|
|
|
|
if (hasCustomProtocol(urlString)) {
|
|
return urlString
|
|
}
|
|
|
|
const hasRelativeProtocol = urlString.startsWith("//")
|
|
const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString)
|
|
|
|
// Prepend protocol
|
|
if (!isRelativeUrl) {
|
|
urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol)
|
|
}
|
|
|
|
const urlObject = new URL(urlString)
|
|
|
|
if (options.forceHttp && options.forceHttps) {
|
|
throw new Error("The `forceHttp` and `forceHttps` options cannot be used together")
|
|
}
|
|
|
|
if (options.forceHttp && urlObject.protocol === "https:") {
|
|
urlObject.protocol = "http:"
|
|
}
|
|
|
|
if (options.forceHttps && urlObject.protocol === "http:") {
|
|
urlObject.protocol = "https:"
|
|
}
|
|
|
|
// Remove auth
|
|
if (options.stripAuthentication) {
|
|
urlObject.username = ""
|
|
urlObject.password = ""
|
|
}
|
|
|
|
// Remove hash
|
|
if (options.stripHash) {
|
|
urlObject.hash = ""
|
|
} else if (options.stripTextFragment) {
|
|
urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, "")
|
|
}
|
|
|
|
// Remove duplicate slashes if not preceded by a protocol
|
|
// NOTE: This could be implemented using a single negative lookbehind
|
|
// regex, but we avoid that to maintain compatibility with older js engines
|
|
// which do not have support for that feature.
|
|
if (urlObject.pathname) {
|
|
// TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(?<!\b[a-z][a-z\d+\-.]{1,50}:)\/{2,}/g, '/');` when Safari supports negative lookbehind.
|
|
|
|
// Split the string by occurrences of this protocol regex, and perform
|
|
// duplicate-slash replacement on the strings between those occurrences
|
|
// (if any).
|
|
const protocolRegex = /\b[a-z][a-z\d+\-.]{1,50}:\/\//g
|
|
|
|
let lastIndex = 0
|
|
let result = ""
|
|
for (;;) {
|
|
const match = protocolRegex.exec(urlObject.pathname)
|
|
if (!match) {
|
|
break
|
|
}
|
|
|
|
const protocol = match[0]
|
|
const protocolAtIndex = match.index
|
|
const intermediate = urlObject.pathname.slice(lastIndex, protocolAtIndex)
|
|
|
|
result += intermediate.replace(/\/{2,}/g, "/")
|
|
result += protocol
|
|
lastIndex = protocolAtIndex + protocol.length
|
|
}
|
|
|
|
const remnant = urlObject.pathname.slice(lastIndex, urlObject.pathname.length)
|
|
result += remnant.replace(/\/{2,}/g, "/")
|
|
|
|
urlObject.pathname = result
|
|
}
|
|
|
|
// Decode URI octets
|
|
if (urlObject.pathname) {
|
|
try {
|
|
urlObject.pathname = decodeURI(urlObject.pathname)
|
|
} catch {}
|
|
}
|
|
|
|
// Remove directory index
|
|
if (options.removeDirectoryIndex === true) {
|
|
options.removeDirectoryIndex = [/^index\.[a-z]+$/]
|
|
}
|
|
|
|
if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) {
|
|
let pathComponents = urlObject.pathname.split("/")
|
|
const lastComponent = pathComponents[pathComponents.length - 1]
|
|
|
|
if (testParameter(lastComponent, options.removeDirectoryIndex)) {
|
|
pathComponents = pathComponents.slice(0, -1)
|
|
urlObject.pathname = pathComponents.slice(1).join("/") + "/"
|
|
}
|
|
}
|
|
|
|
if (urlObject.hostname) {
|
|
// Remove trailing dot
|
|
urlObject.hostname = urlObject.hostname.replace(/\.$/, "")
|
|
|
|
// Remove `www.`
|
|
if (
|
|
options.stripWWW &&
|
|
/^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)
|
|
) {
|
|
// Each label should be max 63 at length (min: 1).
|
|
// Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
|
|
// Each TLD should be up to 63 characters long (min: 2).
|
|
// It is technically possible to have a single character TLD, but none currently exist.
|
|
urlObject.hostname = urlObject.hostname.replace(/^www\./, "")
|
|
}
|
|
}
|
|
|
|
// Remove query unwanted parameters
|
|
if (Array.isArray(options.removeQueryParameters)) {
|
|
// @ts-ignore
|
|
for (const key of [...urlObject.searchParams.keys()]) {
|
|
if (testParameter(key, options.removeQueryParameters)) {
|
|
urlObject.searchParams.delete(key)
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!Array.isArray(options.keepQueryParameters) && options.removeQueryParameters === true) {
|
|
urlObject.search = ""
|
|
}
|
|
|
|
// Keep wanted query parameters
|
|
if (Array.isArray(options.keepQueryParameters) && options.keepQueryParameters.length > 0) {
|
|
// @ts-ignore
|
|
for (const key of [...urlObject.searchParams.keys()]) {
|
|
if (!testParameter(key, options.keepQueryParameters)) {
|
|
urlObject.searchParams.delete(key)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort query parameters
|
|
if (options.sortQueryParameters) {
|
|
urlObject.searchParams.sort()
|
|
|
|
// Calling `.sort()` encodes the search parameters, so we need to decode them again.
|
|
try {
|
|
urlObject.search = decodeURIComponent(urlObject.search)
|
|
} catch {}
|
|
}
|
|
|
|
if (options.removeTrailingSlash) {
|
|
urlObject.pathname = urlObject.pathname.replace(/\/$/, "")
|
|
}
|
|
|
|
// Remove an explicit port number, excluding a default port number, if applicable
|
|
if (options.removeExplicitPort && urlObject.port) {
|
|
urlObject.port = ""
|
|
}
|
|
|
|
const oldUrlString = urlString
|
|
|
|
// Take advantage of many of the Node `url` normalizations
|
|
urlString = urlObject.toString()
|
|
|
|
if (
|
|
!options.removeSingleSlash &&
|
|
urlObject.pathname === "/" &&
|
|
!oldUrlString.endsWith("/") &&
|
|
urlObject.hash === ""
|
|
) {
|
|
urlString = urlString.replace(/\/$/, "")
|
|
}
|
|
|
|
// Remove ending `/` unless removeSingleSlash is false
|
|
if (
|
|
(options.removeTrailingSlash || urlObject.pathname === "/") &&
|
|
urlObject.hash === "" &&
|
|
options.removeSingleSlash
|
|
) {
|
|
urlString = urlString.replace(/\/$/, "")
|
|
}
|
|
|
|
// Restore relative protocol, if applicable
|
|
if (hasRelativeProtocol && !options.normalizeProtocol) {
|
|
urlString = urlString.replace(/^http:\/\//, "//")
|
|
}
|
|
|
|
// Remove http/https
|
|
if (options.stripProtocol) {
|
|
urlString = urlString.replace(/^(?:https?:)?\/\//, "")
|
|
}
|
|
|
|
return urlString
|
|
}
|