// Copied from https://github.com/sindresorhus/normalize-url /* eslint-disable */ export type Options = { /** @default 'http' */ readonly defaultProtocol?: "https" | "http" /** Prepends `defaultProtocol` to the URL if it's protocol-relative. @default true @example ``` normalizeUrl('//sindresorhus.com'); //=> 'http://sindresorhus.com' normalizeUrl('//sindresorhus.com', {normalizeProtocol: false}); //=> '//sindresorhus.com' ``` */ readonly normalizeProtocol?: boolean /** Normalizes HTTPS URLs to HTTP. @default false @example ``` normalizeUrl('https://sindresorhus.com'); //=> 'https://sindresorhus.com' normalizeUrl('https://sindresorhus.com', {forceHttp: true}); //=> 'http://sindresorhus.com' ``` */ readonly forceHttp?: boolean /** Normalizes HTTP URLs to HTTPS. This option cannot be used with the `forceHttp` option at the same time. @default false @example ``` normalizeUrl('http://sindresorhus.com'); //=> 'http://sindresorhus.com' normalizeUrl('http://sindresorhus.com', {forceHttps: true}); //=> 'https://sindresorhus.com' ``` */ readonly forceHttps?: boolean /** Strip the [authentication](https://en.wikipedia.org/wiki/Basic_access_authentication) part of a URL. @default true @example ``` normalizeUrl('user:password@sindresorhus.com'); //=> 'https://sindresorhus.com' normalizeUrl('user:password@sindresorhus.com', {stripAuthentication: false}); //=> 'https://user:password@sindresorhus.com' ``` */ readonly stripAuthentication?: boolean /** Removes hash from the URL. @default false @example ``` normalizeUrl('sindresorhus.com/about.html#contact'); //=> 'http://sindresorhus.com/about.html#contact' normalizeUrl('sindresorhus.com/about.html#contact', {stripHash: true}); //=> 'http://sindresorhus.com/about.html' ``` */ readonly stripHash?: boolean /** Remove the protocol from the URL: `http://sindresorhus.com` → `sindresorhus.com`. It will only remove `https://` and `http://` protocols. @default false @example ``` normalizeUrl('https://sindresorhus.com'); //=> 'https://sindresorhus.com' normalizeUrl('sindresorhus.com', {stripProtocol: true}); //=> 'sindresorhus.com' ``` */ readonly stripProtocol?: boolean /** Strip the [text fragment](https://web.dev/text-fragments/) part of the URL __Note:__ The text fragment will always be removed if the `stripHash` option is set to `true`, as the hash contains the text fragment. @default true @example ``` normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello'); //=> 'http://sindresorhus.com/about.html#' normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello'); //=> 'http://sindresorhus.com/about.html#section' normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello', {stripTextFragment: false}); //=> 'http://sindresorhus.com/about.html#:~:text=hello' normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello', {stripTextFragment: false}); //=> 'http://sindresorhus.com/about.html#section:~:text=hello' ``` */ readonly stripTextFragment?: boolean /** Removes `www.` from the URL. @default true @example ``` normalizeUrl('http://www.sindresorhus.com'); //=> 'http://sindresorhus.com' normalizeUrl('http://www.sindresorhus.com', {stripWWW: false}); //=> 'http://www.sindresorhus.com' ``` */ readonly stripWWW?: boolean /** Removes query parameters that matches any of the provided strings or regexes. @default [/^utm_\w+/i] @example ``` normalizeUrl('www.sindresorhus.com?foo=bar&ref=test_ref', { removeQueryParameters: ['ref'] }); //=> 'http://sindresorhus.com/?foo=bar' ``` If a boolean is provided, `true` will remove all the query parameters. ``` normalizeUrl('www.sindresorhus.com?foo=bar', { removeQueryParameters: true }); //=> 'http://sindresorhus.com' ``` `false` will not remove any query parameter. ``` normalizeUrl('www.sindresorhus.com?foo=bar&utm_medium=test&ref=test_ref', { removeQueryParameters: false }); //=> 'http://www.sindresorhus.com/?foo=bar&ref=test_ref&utm_medium=test' ``` */ readonly removeQueryParameters?: ReadonlyArray | boolean /** Keeps only query parameters that matches any of the provided strings or regexes. __Note__: It overrides the `removeQueryParameters` option. @default undefined @example ``` normalizeUrl('https://sindresorhus.com?foo=bar&ref=unicorn', { keepQueryParameters: ['ref'] }); //=> 'https://sindresorhus.com/?ref=unicorn' ``` */ readonly keepQueryParameters?: ReadonlyArray /** Removes trailing slash. __Note__: Trailing slash is always removed if the URL doesn't have a pathname unless the `removeSingleSlash` option is set to `false`. @default true @example ``` normalizeUrl('http://sindresorhus.com/redirect/'); //=> 'http://sindresorhus.com/redirect' normalizeUrl('http://sindresorhus.com/redirect/', {removeTrailingSlash: false}); //=> 'http://sindresorhus.com/redirect/' normalizeUrl('http://sindresorhus.com/', {removeTrailingSlash: false}); //=> 'http://sindresorhus.com' ``` */ readonly removeTrailingSlash?: boolean /** Remove a sole `/` pathname in the output. This option is independent of `removeTrailingSlash`. @default true @example ``` normalizeUrl('https://sindresorhus.com/'); //=> 'https://sindresorhus.com' normalizeUrl('https://sindresorhus.com/', {removeSingleSlash: false}); //=> 'https://sindresorhus.com/' ``` */ readonly removeSingleSlash?: boolean /** Removes the default directory index file from path that matches any of the provided strings or regexes. When `true`, the regex `/^index\.[a-z]+$/` is used. @default false @example ``` normalizeUrl('www.sindresorhus.com/foo/default.php', { removeDirectoryIndex: [/^default\.[a-z]+$/] }); //=> 'http://sindresorhus.com/foo' ``` */ readonly removeDirectoryIndex?: boolean | ReadonlyArray /** Removes an explicit port number from the URL. Port 443 is always removed from HTTPS URLs and 80 is always removed from HTTP URLs regardless of this option. @default false @example ``` normalizeUrl('sindresorhus.com:123', { removeExplicitPort: true }); //=> 'http://sindresorhus.com' ``` */ readonly removeExplicitPort?: boolean /** Sorts the query parameters alphabetically by key. @default true @example ``` normalizeUrl('www.sindresorhus.com?b=two&a=one&c=three', { sortQueryParameters: false }); //=> 'http://sindresorhus.com/?b=two&a=one&c=three' ``` */ readonly sortQueryParameters?: boolean } // https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs const DATA_URL_DEFAULT_MIME_TYPE = "text/plain" const DATA_URL_DEFAULT_CHARSET = "us-ascii" const testParameter = (name: string, filters: any[]) => filters.some(filter => (filter instanceof RegExp ? filter.test(name) : filter === name)) const supportedProtocols = new Set(["https:", "http:", "file:"]) const hasCustomProtocol = (urlString: string) => { try { const {protocol} = new URL(urlString) return protocol.endsWith(":") && !supportedProtocols.has(protocol) } catch { return false } } const normalizeDataURL = (urlString: string, {stripHash}: {stripHash: boolean}) => { const match = /^data:(?[^,]*?),(?[^#]*?)(?:#(?.*))?$/.exec(urlString) if (!match) { throw new Error(`Invalid URL: ${urlString}`) } let {type, data, hash} = match.groups as any const mediaType = type.split(";") hash = stripHash ? "" : hash let isBase64 = false if (mediaType[mediaType.length - 1] === "base64") { mediaType.pop() isBase64 = true } // Lowercase MIME type const mimeType = mediaType.shift()?.toLowerCase() ?? "" const attributes = mediaType .map((attribute: string) => { let [key, value = ""] = attribute.split("=").map((s: string) => s.trim()) // Lowercase `charset` if (key === "charset") { value = value.toLowerCase() if (value === DATA_URL_DEFAULT_CHARSET) { return "" } } return `${key}${value ? `=${value}` : ""}` }) .filter(Boolean) const normalizedMediaType = [...attributes] if (isBase64) { normalizedMediaType.push("base64") } if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) { normalizedMediaType.unshift(mimeType) } return `data:${normalizedMediaType.join(";")},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ""}` } /** [Normalize](https://en.wikipedia.org/wiki/URL_normalization) a URL. URLs with custom protocols are not normalized and just passed through by default. Supported protocols are: `https`, `http`, `file`, and `data`. @param url - URL to normalize, including [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs). @example ``` import normalizeUrl from 'normalize-url'; normalizeUrl('sindresorhus.com'); //=> 'http://sindresorhus.com' normalizeUrl('//www.sindresorhus.com:80/../baz?b=bar&a=foo'); //=> 'http://sindresorhus.com/baz?a=foo&b=bar' ``` */ export default function normalizeUrl(urlString: string, opts?: Options): string { const options = { defaultProtocol: "http", normalizeProtocol: true, forceHttp: false, forceHttps: false, stripAuthentication: true, stripHash: false, stripTextFragment: true, stripWWW: true, removeQueryParameters: [/^utm_\w+/i], removeTrailingSlash: true, removeSingleSlash: true, removeDirectoryIndex: false, removeExplicitPort: false, sortQueryParameters: true, ...opts, } // Legacy: Append `:` to the protocol if missing. if (typeof options.defaultProtocol === "string" && !options.defaultProtocol.endsWith(":")) { options.defaultProtocol = `${options.defaultProtocol}:` } urlString = urlString.trim() // Data URL if (/^data:/i.test(urlString)) { return normalizeDataURL(urlString, options) } if (hasCustomProtocol(urlString)) { return urlString } const hasRelativeProtocol = urlString.startsWith("//") const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString) // Prepend protocol if (!isRelativeUrl) { urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol) } const urlObject = new URL(urlString) if (options.forceHttp && options.forceHttps) { throw new Error("The `forceHttp` and `forceHttps` options cannot be used together") } if (options.forceHttp && urlObject.protocol === "https:") { urlObject.protocol = "http:" } if (options.forceHttps && urlObject.protocol === "http:") { urlObject.protocol = "https:" } // Remove auth if (options.stripAuthentication) { urlObject.username = "" urlObject.password = "" } // Remove hash if (options.stripHash) { urlObject.hash = "" } else if (options.stripTextFragment) { urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, "") } // Remove duplicate slashes if not preceded by a protocol // NOTE: This could be implemented using a single negative lookbehind // regex, but we avoid that to maintain compatibility with older js engines // which do not have support for that feature. if (urlObject.pathname) { // TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(? 0) { let pathComponents = urlObject.pathname.split("/") const lastComponent = pathComponents[pathComponents.length - 1] if (testParameter(lastComponent, options.removeDirectoryIndex)) { pathComponents = pathComponents.slice(0, -1) urlObject.pathname = pathComponents.slice(1).join("/") + "/" } } if (urlObject.hostname) { // Remove trailing dot urlObject.hostname = urlObject.hostname.replace(/\.$/, "") // Remove `www.` if ( options.stripWWW && /^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname) ) { // Each label should be max 63 at length (min: 1). // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names // Each TLD should be up to 63 characters long (min: 2). // It is technically possible to have a single character TLD, but none currently exist. urlObject.hostname = urlObject.hostname.replace(/^www\./, "") } } // Remove query unwanted parameters if (Array.isArray(options.removeQueryParameters)) { // @ts-ignore for (const key of [...urlObject.searchParams.keys()]) { if (testParameter(key, options.removeQueryParameters)) { urlObject.searchParams.delete(key) } } } if (!Array.isArray(options.keepQueryParameters) && options.removeQueryParameters === true) { urlObject.search = "" } // Keep wanted query parameters if (Array.isArray(options.keepQueryParameters) && options.keepQueryParameters.length > 0) { // @ts-ignore for (const key of [...urlObject.searchParams.keys()]) { if (!testParameter(key, options.keepQueryParameters)) { urlObject.searchParams.delete(key) } } } // Sort query parameters if (options.sortQueryParameters) { urlObject.searchParams.sort() // Calling `.sort()` encodes the search parameters, so we need to decode them again. try { urlObject.search = decodeURIComponent(urlObject.search) } catch {} } if (options.removeTrailingSlash) { urlObject.pathname = urlObject.pathname.replace(/\/$/, "") } // Remove an explicit port number, excluding a default port number, if applicable if (options.removeExplicitPort && urlObject.port) { urlObject.port = "" } const oldUrlString = urlString // Take advantage of many of the Node `url` normalizations urlString = urlObject.toString() if ( !options.removeSingleSlash && urlObject.pathname === "/" && !oldUrlString.endsWith("/") && urlObject.hash === "" ) { urlString = urlString.replace(/\/$/, "") } // Remove ending `/` unless removeSingleSlash is false if ( (options.removeTrailingSlash || urlObject.pathname === "/") && urlObject.hash === "" && options.removeSingleSlash ) { urlString = urlString.replace(/\/$/, "") } // Restore relative protocol, if applicable if (hasRelativeProtocol && !options.normalizeProtocol) { urlString = urlString.replace(/^http:\/\//, "//") } // Remove http/https if (options.stripProtocol) { urlString = urlString.replace(/^(?:https?:)?\/\//, "") } return urlString }