123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- /*! https://mths.be/utf8js v2.1.2 by @mathias */
- var stringFromCharCode = String.fromCharCode;
- // Taken from https://mths.be/punycode
- function ucs2decode(string) {
- var output = [];
- var counter = 0;
- var length = string.length;
- var value;
- var extra;
- while (counter < length) {
- value = string.charCodeAt(counter++);
- if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
- // high surrogate, and there is a next character
- extra = string.charCodeAt(counter++);
- if ((extra & 0xFC00) == 0xDC00) { // low surrogate
- output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
- } else {
- // unmatched surrogate; only append this code unit, in case the next
- // code unit is the high surrogate of a surrogate pair
- output.push(value);
- counter--;
- }
- } else {
- output.push(value);
- }
- }
- return output;
- }
- // Taken from https://mths.be/punycode
- function ucs2encode(array) {
- var length = array.length;
- var index = -1;
- var value;
- var output = '';
- while (++index < length) {
- value = array[index];
- if (value > 0xFFFF) {
- value -= 0x10000;
- output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
- value = 0xDC00 | value & 0x3FF;
- }
- output += stringFromCharCode(value);
- }
- return output;
- }
- function checkScalarValue(codePoint, strict) {
- if (codePoint >= 0xD800 && codePoint <= 0xDFFF) {
- if (strict) {
- throw Error(
- 'Lone surrogate U+' + codePoint.toString(16).toUpperCase() +
- ' is not a scalar value'
- );
- }
- return false;
- }
- return true;
- }
- /*--------------------------------------------------------------------------*/
- function createByte(codePoint, shift) {
- return stringFromCharCode(((codePoint >> shift) & 0x3F) | 0x80);
- }
- function encodeCodePoint(codePoint, strict) {
- if ((codePoint & 0xFFFFFF80) == 0) { // 1-byte sequence
- return stringFromCharCode(codePoint);
- }
- var symbol = '';
- if ((codePoint & 0xFFFFF800) == 0) { // 2-byte sequence
- symbol = stringFromCharCode(((codePoint >> 6) & 0x1F) | 0xC0);
- }
- else if ((codePoint & 0xFFFF0000) == 0) { // 3-byte sequence
- if (!checkScalarValue(codePoint, strict)) {
- codePoint = 0xFFFD;
- }
- symbol = stringFromCharCode(((codePoint >> 12) & 0x0F) | 0xE0);
- symbol += createByte(codePoint, 6);
- }
- else if ((codePoint & 0xFFE00000) == 0) { // 4-byte sequence
- symbol = stringFromCharCode(((codePoint >> 18) & 0x07) | 0xF0);
- symbol += createByte(codePoint, 12);
- symbol += createByte(codePoint, 6);
- }
- symbol += stringFromCharCode((codePoint & 0x3F) | 0x80);
- return symbol;
- }
- function utf8encode(string, opts) {
- opts = opts || {};
- var strict = false !== opts.strict;
- var codePoints = ucs2decode(string);
- var length = codePoints.length;
- var index = -1;
- var codePoint;
- var byteString = '';
- while (++index < length) {
- codePoint = codePoints[index];
- byteString += encodeCodePoint(codePoint, strict);
- }
- return byteString;
- }
- /*--------------------------------------------------------------------------*/
- function readContinuationByte() {
- if (byteIndex >= byteCount) {
- throw Error('Invalid byte index');
- }
- var continuationByte = byteArray[byteIndex] & 0xFF;
- byteIndex++;
- if ((continuationByte & 0xC0) == 0x80) {
- return continuationByte & 0x3F;
- }
- // If we end up here, it’s not a continuation byte
- throw Error('Invalid continuation byte');
- }
- function decodeSymbol(strict) {
- var byte1;
- var byte2;
- var byte3;
- var byte4;
- var codePoint;
- if (byteIndex > byteCount) {
- throw Error('Invalid byte index');
- }
- if (byteIndex == byteCount) {
- return false;
- }
- // Read first byte
- byte1 = byteArray[byteIndex] & 0xFF;
- byteIndex++;
- // 1-byte sequence (no continuation bytes)
- if ((byte1 & 0x80) == 0) {
- return byte1;
- }
- // 2-byte sequence
- if ((byte1 & 0xE0) == 0xC0) {
- byte2 = readContinuationByte();
- codePoint = ((byte1 & 0x1F) << 6) | byte2;
- if (codePoint >= 0x80) {
- return codePoint;
- } else {
- throw Error('Invalid continuation byte');
- }
- }
- // 3-byte sequence (may include unpaired surrogates)
- if ((byte1 & 0xF0) == 0xE0) {
- byte2 = readContinuationByte();
- byte3 = readContinuationByte();
- codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3;
- if (codePoint >= 0x0800) {
- return checkScalarValue(codePoint, strict) ? codePoint : 0xFFFD;
- } else {
- throw Error('Invalid continuation byte');
- }
- }
- // 4-byte sequence
- if ((byte1 & 0xF8) == 0xF0) {
- byte2 = readContinuationByte();
- byte3 = readContinuationByte();
- byte4 = readContinuationByte();
- codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) |
- (byte3 << 0x06) | byte4;
- if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
- return codePoint;
- }
- }
- throw Error('Invalid UTF-8 detected');
- }
- var byteArray;
- var byteCount;
- var byteIndex;
- function utf8decode(byteString, opts) {
- opts = opts || {};
- var strict = false !== opts.strict;
- byteArray = ucs2decode(byteString);
- byteCount = byteArray.length;
- byteIndex = 0;
- var codePoints = [];
- var tmp;
- while ((tmp = decodeSymbol(strict)) !== false) {
- codePoints.push(tmp);
- }
- return ucs2encode(codePoints);
- }
- module.exports = {
- version: '2.1.2',
- encode: utf8encode,
- decode: utf8decode
- };
|