Details in this pull request and commit, and in their source. In short: UTF-8 is well-designed, David Chan is awesome, and a regular expression will solve all our problems. * https://github.com/Financial-Times/polyfill-library/commit/0ece79ce32 * https://github.com/Financial-Times/polyfill-library/pull/1173 Bug: T103379 Bug: T207365 Change-Id: I7c4f9b6449a4317d68f4923fb4f198181bbfe800
584 lines
18 KiB
JavaScript
584 lines
18 KiB
JavaScript
/* global Symbol */
|
|
// URL Polyfill
|
|
// Draft specification: https://url.spec.whatwg.org
|
|
|
|
// Notes:
|
|
// - Primarily useful for parsing URLs and modifying query parameters
|
|
// - Should work in IE8+ and everything more modern, with es5.js polyfills
|
|
|
|
(function (global) {
|
|
'use strict';
|
|
|
|
function isSequence(o) {
|
|
if (!o) return false;
|
|
if ('Symbol' in global && 'iterator' in global.Symbol &&
|
|
typeof o[Symbol.iterator] === 'function') return true;
|
|
if (Array.isArray(o)) return true;
|
|
return false;
|
|
}
|
|
|
|
;(function() { // eslint-disable-line no-extra-semi
|
|
|
|
// Browsers may have:
|
|
// * No global URL object
|
|
// * URL with static methods only - may have a dummy constructor
|
|
// * URL with members except searchParams
|
|
// * Full URL API support
|
|
var origURL = global.URL;
|
|
var nativeURL;
|
|
try {
|
|
if (origURL) {
|
|
nativeURL = new global.URL('http://example.com');
|
|
if ('searchParams' in nativeURL) {
|
|
var url = new URL('http://example.com');
|
|
url.search = 'a=1&b=2';
|
|
if (url.href === 'http://example.com/?a=1&b=2') {
|
|
url.search = '';
|
|
if (url.href === 'http://example.com/') {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
if (!('href' in nativeURL)) {
|
|
nativeURL = undefined;
|
|
}
|
|
nativeURL = undefined;
|
|
}
|
|
// eslint-disable-next-line no-empty
|
|
} catch (_) {}
|
|
|
|
// NOTE: Doesn't do the encoding/decoding dance
|
|
function urlencoded_serialize(pairs) {
|
|
var output = '', first = true;
|
|
pairs.forEach(function (pair) {
|
|
var name = encodeURIComponent(pair.name);
|
|
var value = encodeURIComponent(pair.value);
|
|
if (!first) output += '&';
|
|
output += name + '=' + value;
|
|
first = false;
|
|
});
|
|
return output.replace(/%20/g, '+');
|
|
}
|
|
|
|
// https://url.spec.whatwg.org/#percent-decode
|
|
var cachedDecodePattern;
|
|
function percent_decode(bytes) {
|
|
// This can't simply use decodeURIComponent (part of ECMAScript) as that's limited to
|
|
// decoding to valid UTF-8 only. It throws URIError for literals that look like percent
|
|
// encoding (e.g. `x=%`, `x=%a`, and `x=a%2sf`) and for non-UTF8 binary data that was
|
|
// percent encoded and cannot be turned back into binary within a JavaScript string.
|
|
//
|
|
// The spec deals with this as follows:
|
|
// * Read input as UTF-8 encoded bytes. This needs low-level access or a modern
|
|
// Web API, like TextDecoder. Old browsers don't have that, and it'd a large
|
|
// dependency to add to this polyfill.
|
|
// * For each percentage sign followed by two hex, blindly decode the byte in binary
|
|
// form. This would require TextEncoder to not corrupt multi-byte chars.
|
|
// * Replace any bytes that would be invalid under UTF-8 with U+FFFD.
|
|
//
|
|
// Instead we:
|
|
// * Use the fact that UTF-8 is designed to make validation easy in binary.
|
|
// You don't have to decode first. There are only a handful of valid prefixes and
|
|
// ranges, per RFC 3629. <https://datatracker.ietf.org/doc/html/rfc3629#section-3>
|
|
// * Safely create multi-byte chars with decodeURIComponent, by only passing it
|
|
// valid and full characters (e.g. "%F0" separately from "%F0%9F%92%A9" throws).
|
|
// Anything else is kept as literal or replaced with U+FFFD, as per the URL spec.
|
|
|
|
if (!cachedDecodePattern) {
|
|
// In a UTF-8 multibyte sequence, non-initial bytes are always between %80 and %BF
|
|
var uContinuation = '%[89AB][0-9A-F]';
|
|
|
|
// The length of a UTF-8 sequence is specified by the first byte
|
|
//
|
|
// One-byte sequences: 0xxxxxxx
|
|
// So the byte is between %00 and %7F
|
|
var u1Bytes = '%[0-7][0-9A-F]';
|
|
// Two-byte sequences: 110xxxxx 10xxxxxx
|
|
// So the first byte is between %C0 and %DF
|
|
var u2Bytes = '%[CD][0-9A-F]' + uContinuation;
|
|
// Three-byte sequences: 1110xxxx 10xxxxxx 10xxxxxx
|
|
// So the first byte is between %E0 and %EF
|
|
var u3Bytes = '%E[0-9A-F]' + uContinuation + uContinuation;
|
|
// Four-byte sequences: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
// So the first byte is between %F0 and %F7
|
|
var u4Bytes = '%F[0-7]' + uContinuation + uContinuation +uContinuation;
|
|
|
|
var anyByte = '%[0-9A-F][0-9A-F]';
|
|
|
|
// Match some consecutive percent-escaped bytes. More precisely, match
|
|
// 1-4 bytes that validly encode one character in UTF-8, or 1 byte that
|
|
// would be invalid in UTF-8 in this location.
|
|
cachedDecodePattern = new RegExp(
|
|
'(' + u4Bytes + ')|(' + u3Bytes + ')|(' + u2Bytes + ')|(' + u1Bytes + ')|(' + anyByte + ')',
|
|
'gi'
|
|
);
|
|
}
|
|
|
|
return bytes.replace(cachedDecodePattern, function (match, u4, u3, u2, u1, uBad) {
|
|
return (uBad !== undefined) ? '\uFFFD' : decodeURIComponent(match);
|
|
});
|
|
}
|
|
|
|
// NOTE: Doesn't do the encoding/decoding dance
|
|
//
|
|
// https://url.spec.whatwg.org/#concept-urlencoded-parser
|
|
function urlencoded_parse(input, isindex) {
|
|
var sequences = input.split('&');
|
|
if (isindex && sequences[0].indexOf('=') === -1)
|
|
sequences[0] = '=' + sequences[0];
|
|
var pairs = [];
|
|
sequences.forEach(function (bytes) {
|
|
if (bytes.length === 0) return;
|
|
var index = bytes.indexOf('=');
|
|
if (index !== -1) {
|
|
var name = bytes.substring(0, index);
|
|
var value = bytes.substring(index + 1);
|
|
} else {
|
|
name = bytes;
|
|
value = '';
|
|
}
|
|
name = name.replace(/\+/g, ' ');
|
|
value = value.replace(/\+/g, ' ');
|
|
pairs.push({ name: name, value: value });
|
|
});
|
|
var output = [];
|
|
pairs.forEach(function (pair) {
|
|
output.push({
|
|
name: percent_decode(pair.name),
|
|
value: percent_decode(pair.value)
|
|
});
|
|
});
|
|
return output;
|
|
}
|
|
|
|
function URLUtils(url) {
|
|
if (nativeURL)
|
|
return new origURL(url);
|
|
var anchor = document.createElement('a');
|
|
anchor.href = url;
|
|
return anchor;
|
|
}
|
|
|
|
function URLSearchParams(init) {
|
|
var $this = this;
|
|
this._list = [];
|
|
|
|
if (init === undefined || init === null) {
|
|
// no-op
|
|
} else if (init instanceof URLSearchParams) {
|
|
// In ES6 init would be a sequence, but special case for ES5.
|
|
this._list = urlencoded_parse(String(init));
|
|
} else if (typeof init === 'object' && isSequence(init)) {
|
|
Array.from(init).forEach(function(e) {
|
|
if (!isSequence(e)) throw TypeError();
|
|
var nv = Array.from(e);
|
|
if (nv.length !== 2) throw TypeError();
|
|
$this._list.push({name: String(nv[0]), value: String(nv[1])});
|
|
});
|
|
} else if (typeof init === 'object' && init) {
|
|
Object.keys(init).forEach(function(key) {
|
|
$this._list.push({name: String(key), value: String(init[key])});
|
|
});
|
|
} else {
|
|
init = String(init);
|
|
if (init.substring(0, 1) === '?')
|
|
init = init.substring(1);
|
|
this._list = urlencoded_parse(init);
|
|
}
|
|
|
|
this._url_object = null;
|
|
this._setList = function (list) { if (!updating) $this._list = list; };
|
|
|
|
var updating = false;
|
|
this._update_steps = function() {
|
|
if (updating) return;
|
|
updating = true;
|
|
|
|
if (!$this._url_object) return;
|
|
|
|
// Partial workaround for IE issue with 'about:'
|
|
if ($this._url_object.protocol === 'about:' &&
|
|
$this._url_object.pathname.indexOf('?') !== -1) {
|
|
$this._url_object.pathname = $this._url_object.pathname.split('?')[0];
|
|
}
|
|
|
|
$this._url_object.search = urlencoded_serialize($this._list);
|
|
|
|
updating = false;
|
|
};
|
|
}
|
|
|
|
|
|
Object.defineProperties(URLSearchParams.prototype, {
|
|
append: {
|
|
value: function (name, value) {
|
|
this._list.push({ name: name, value: value });
|
|
this._update_steps();
|
|
}, writable: true, enumerable: true, configurable: true
|
|
},
|
|
|
|
'delete': {
|
|
value: function (name) {
|
|
for (var i = 0; i < this._list.length;) {
|
|
if (this._list[i].name === name)
|
|
this._list.splice(i, 1);
|
|
else
|
|
++i;
|
|
}
|
|
this._update_steps();
|
|
}, writable: true, enumerable: true, configurable: true
|
|
},
|
|
|
|
get: {
|
|
value: function (name) {
|
|
for (var i = 0; i < this._list.length; ++i) {
|
|
if (this._list[i].name === name)
|
|
return this._list[i].value;
|
|
}
|
|
return null;
|
|
}, writable: true, enumerable: true, configurable: true
|
|
},
|
|
|
|
getAll: {
|
|
value: function (name) {
|
|
var result = [];
|
|
for (var i = 0; i < this._list.length; ++i) {
|
|
if (this._list[i].name === name)
|
|
result.push(this._list[i].value);
|
|
}
|
|
return result;
|
|
}, writable: true, enumerable: true, configurable: true
|
|
},
|
|
|
|
has: {
|
|
value: function (name) {
|
|
for (var i = 0; i < this._list.length; ++i) {
|
|
if (this._list[i].name === name)
|
|
return true;
|
|
}
|
|
return false;
|
|
}, writable: true, enumerable: true, configurable: true
|
|
},
|
|
|
|
set: {
|
|
value: function (name, value) {
|
|
var found = false;
|
|
for (var i = 0; i < this._list.length;) {
|
|
if (this._list[i].name === name) {
|
|
if (!found) {
|
|
this._list[i].value = value;
|
|
found = true;
|
|
++i;
|
|
} else {
|
|
this._list.splice(i, 1);
|
|
}
|
|
} else {
|
|
++i;
|
|
}
|
|
}
|
|
|
|
if (!found)
|
|
this._list.push({ name: name, value: value });
|
|
|
|
this._update_steps();
|
|
}, writable: true, enumerable: true, configurable: true
|
|
},
|
|
|
|
entries: {
|
|
value: function() { return new Iterator(this._list, 'key+value'); },
|
|
writable: true, enumerable: true, configurable: true
|
|
},
|
|
|
|
keys: {
|
|
value: function() { return new Iterator(this._list, 'key'); },
|
|
writable: true, enumerable: true, configurable: true
|
|
},
|
|
|
|
values: {
|
|
value: function() { return new Iterator(this._list, 'value'); },
|
|
writable: true, enumerable: true, configurable: true
|
|
},
|
|
|
|
forEach: {
|
|
value: function(callback) {
|
|
var thisArg = (arguments.length > 1) ? arguments[1] : undefined;
|
|
this._list.forEach(function(pair) {
|
|
callback.call(thisArg, pair.value, pair.name);
|
|
});
|
|
|
|
}, writable: true, enumerable: true, configurable: true
|
|
},
|
|
|
|
toString: {
|
|
value: function () {
|
|
return urlencoded_serialize(this._list);
|
|
}, writable: true, enumerable: false, configurable: true
|
|
},
|
|
|
|
sort: {
|
|
value: function sort() {
|
|
var entries = this.entries();
|
|
var entry = entries.next();
|
|
var keys = [];
|
|
var values = {};
|
|
|
|
while (!entry.done) {
|
|
var value = entry.value;
|
|
var key = value[0];
|
|
keys.push(key);
|
|
if (!(Object.prototype.hasOwnProperty.call(values, key))) {
|
|
values[key] = [];
|
|
}
|
|
values[key].push(value[1]);
|
|
entry = entries.next();
|
|
}
|
|
|
|
keys.sort();
|
|
for (var i = 0; i < keys.length; i++) {
|
|
this["delete"](keys[i]);
|
|
}
|
|
for (var j = 0; j < keys.length; j++) {
|
|
key = keys[j];
|
|
this.append(key, values[key].shift());
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
function Iterator(source, kind) {
|
|
var index = 0;
|
|
this.next = function() {
|
|
if (index >= source.length)
|
|
return {done: true, value: undefined};
|
|
var pair = source[index++];
|
|
return {done: false, value:
|
|
kind === 'key' ? pair.name :
|
|
kind === 'value' ? pair.value :
|
|
[pair.name, pair.value]};
|
|
};
|
|
}
|
|
|
|
if ('Symbol' in global && 'iterator' in global.Symbol) {
|
|
Object.defineProperty(URLSearchParams.prototype, global.Symbol.iterator, {
|
|
value: URLSearchParams.prototype.entries,
|
|
writable: true, enumerable: true, configurable: true});
|
|
Object.defineProperty(Iterator.prototype, global.Symbol.iterator, {
|
|
value: function() { return this; },
|
|
writable: true, enumerable: true, configurable: true});
|
|
}
|
|
|
|
function URL(url, base) {
|
|
if (!(this instanceof global.URL))
|
|
throw new TypeError("Failed to construct 'URL': Please use the 'new' operator.");
|
|
|
|
if (base) {
|
|
url = (function () {
|
|
if (nativeURL) return new origURL(url, base).href;
|
|
var iframe;
|
|
try {
|
|
var doc;
|
|
// Use another document/base tag/anchor for relative URL resolution, if possible
|
|
if (Object.prototype.toString.call(window.operamini) === "[object OperaMini]") {
|
|
iframe = document.createElement('iframe');
|
|
iframe.style.display = 'none';
|
|
document.documentElement.appendChild(iframe);
|
|
doc = iframe.contentWindow.document;
|
|
} else if (document.implementation && document.implementation.createHTMLDocument) {
|
|
doc = document.implementation.createHTMLDocument('');
|
|
} else if (document.implementation && document.implementation.createDocument) {
|
|
doc = document.implementation.createDocument('http://www.w3.org/1999/xhtml', 'html', null);
|
|
doc.documentElement.appendChild(doc.createElement('head'));
|
|
doc.documentElement.appendChild(doc.createElement('body'));
|
|
} else if (window.ActiveXObject) {
|
|
doc = new window.ActiveXObject('htmlfile');
|
|
doc.write('<head></head><body></body>');
|
|
doc.close();
|
|
}
|
|
|
|
if (!doc) throw Error('base not supported');
|
|
|
|
var baseTag = doc.createElement('base');
|
|
baseTag.href = base;
|
|
doc.getElementsByTagName('head')[0].appendChild(baseTag);
|
|
var anchor = doc.createElement('a');
|
|
anchor.href = url;
|
|
return anchor.href;
|
|
} finally {
|
|
if (iframe)
|
|
iframe.parentNode.removeChild(iframe);
|
|
}
|
|
}());
|
|
}
|
|
|
|
// An inner object implementing URLUtils (either a native URL
|
|
// object or an HTMLAnchorElement instance) is used to perform the
|
|
// URL algorithms. With full ES5 getter/setter support, return a
|
|
// regular object For IE8's limited getter/setter support, a
|
|
// different HTMLAnchorElement is returned with properties
|
|
// overridden
|
|
|
|
var instance = URLUtils(url || '');
|
|
|
|
// Detect for ES5 getter/setter support
|
|
// (an Object.defineProperties polyfill that doesn't support getters/setters may throw)
|
|
var ES5_GET_SET = (function() {
|
|
if (!('defineProperties' in Object)) return false;
|
|
try {
|
|
var obj = {};
|
|
Object.defineProperties(obj, { prop: { get: function () { return true; } } });
|
|
return obj.prop;
|
|
} catch (_) {
|
|
return false;
|
|
}
|
|
}());
|
|
|
|
var self = ES5_GET_SET ? this : document.createElement('a');
|
|
|
|
|
|
|
|
var query_object = new URLSearchParams(
|
|
instance.search ? instance.search.substring(1) : null);
|
|
query_object._url_object = self;
|
|
|
|
Object.defineProperties(self, {
|
|
href: {
|
|
get: function () { return instance.href; },
|
|
set: function (v) { instance.href = v; tidy_instance(); update_steps(); },
|
|
enumerable: true, configurable: true
|
|
},
|
|
origin: {
|
|
get: function () {
|
|
if (this.protocol.toLowerCase() === "data:") {
|
|
return null
|
|
}
|
|
|
|
if ('origin' in instance) return instance.origin;
|
|
return this.protocol + '//' + this.host;
|
|
},
|
|
enumerable: true, configurable: true
|
|
},
|
|
protocol: {
|
|
get: function () { return instance.protocol; },
|
|
set: function (v) { instance.protocol = v; },
|
|
enumerable: true, configurable: true
|
|
},
|
|
username: {
|
|
get: function () { return instance.username; },
|
|
set: function (v) { instance.username = v; },
|
|
enumerable: true, configurable: true
|
|
},
|
|
password: {
|
|
get: function () { return instance.password; },
|
|
set: function (v) { instance.password = v; },
|
|
enumerable: true, configurable: true
|
|
},
|
|
host: {
|
|
get: function () {
|
|
// IE returns default port in |host|
|
|
var re = {'http:': /:80$/, 'https:': /:443$/, 'ftp:': /:21$/}[instance.protocol];
|
|
return re ? instance.host.replace(re, '') : instance.host;
|
|
},
|
|
set: function (v) { instance.host = v; },
|
|
enumerable: true, configurable: true
|
|
},
|
|
hostname: {
|
|
get: function () { return instance.hostname; },
|
|
set: function (v) { instance.hostname = v; },
|
|
enumerable: true, configurable: true
|
|
},
|
|
port: {
|
|
get: function () { return instance.port; },
|
|
set: function (v) { instance.port = v; },
|
|
enumerable: true, configurable: true
|
|
},
|
|
pathname: {
|
|
get: function () {
|
|
// IE does not include leading '/' in |pathname|
|
|
if (instance.pathname.charAt(0) !== '/') return '/' + instance.pathname;
|
|
return instance.pathname;
|
|
},
|
|
set: function (v) { instance.pathname = v; },
|
|
enumerable: true, configurable: true
|
|
},
|
|
search: {
|
|
get: function () { return instance.search; },
|
|
set: function (v) {
|
|
if (instance.search === v) return;
|
|
instance.search = v; tidy_instance(); update_steps();
|
|
},
|
|
enumerable: true, configurable: true
|
|
},
|
|
searchParams: {
|
|
get: function () { return query_object; },
|
|
enumerable: true, configurable: true
|
|
},
|
|
hash: {
|
|
get: function () { return instance.hash; },
|
|
set: function (v) { instance.hash = v; tidy_instance(); },
|
|
enumerable: true, configurable: true
|
|
},
|
|
toString: {
|
|
value: function() { return instance.toString(); },
|
|
enumerable: false, configurable: true
|
|
},
|
|
valueOf: {
|
|
value: function() { return instance.valueOf(); },
|
|
enumerable: false, configurable: true
|
|
}
|
|
});
|
|
|
|
function tidy_instance() {
|
|
var href = instance.href.replace(/#$|\?$|\?(?=#)/g, '');
|
|
if (instance.href !== href)
|
|
instance.href = href;
|
|
}
|
|
|
|
function update_steps() {
|
|
query_object._setList(instance.search ? urlencoded_parse(instance.search.substring(1)) : []);
|
|
query_object._update_steps();
|
|
}
|
|
|
|
return self;
|
|
}
|
|
|
|
if (origURL) {
|
|
for (var i in origURL) {
|
|
if (Object.prototype.hasOwnProperty.call(origURL, i) && typeof origURL[i] === 'function')
|
|
URL[i] = origURL[i];
|
|
}
|
|
}
|
|
|
|
global.URL = URL;
|
|
global.URLSearchParams = URLSearchParams;
|
|
})();
|
|
|
|
// Patch native URLSearchParams constructor to handle sequences/records
|
|
// if necessary.
|
|
(function() {
|
|
if (new global.URLSearchParams([['a', 1]]).get('a') === '1' &&
|
|
new global.URLSearchParams({a: 1}).get('a') === '1')
|
|
return;
|
|
var orig = global.URLSearchParams;
|
|
global.URLSearchParams = function(init) {
|
|
if (init && typeof init === 'object' && isSequence(init)) {
|
|
var o = new orig();
|
|
Array.from(init).forEach(function (e) {
|
|
if (!isSequence(e)) throw TypeError();
|
|
var nv = Array.from(e);
|
|
if (nv.length !== 2) throw TypeError();
|
|
o.append(nv[0], nv[1]);
|
|
});
|
|
return o;
|
|
} else if (init && typeof init === 'object') {
|
|
o = new orig();
|
|
Object.keys(init).forEach(function(key) {
|
|
o.set(key, init[key]);
|
|
});
|
|
return o;
|
|
} else {
|
|
return new orig(init);
|
|
}
|
|
};
|
|
})();
|
|
|
|
}(self));
|