TrieBuilder.js 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.serializeBase64 = exports.TrieBuilder = exports.BITS_32 = exports.BITS_16 = void 0;
  4. var Trie_1 = require("./Trie");
  5. var base64_arraybuffer_1 = require("base64-arraybuffer");
  6. /**
  7. * Trie2 constants, defining shift widths, index array lengths, etc.
  8. *
  9. * These are needed for the runtime macros but users can treat these as
  10. * implementation details and skip to the actual public API further below.
  11. */
  12. // const UTRIE2_OPTIONS_VALUE_BITS_MASK = 0x000f;
  13. /** Number of code points per index-1 table entry. 2048=0x800 */
  14. var UTRIE2_CP_PER_INDEX_1_ENTRY = 1 << Trie_1.UTRIE2_SHIFT_1;
  15. /** The alignment size of a data block. Also the granularity for compaction. */
  16. var UTRIE2_DATA_GRANULARITY = 1 << Trie_1.UTRIE2_INDEX_SHIFT;
  17. /* Fixed layout of the first part of the index array. ------------------- */
  18. /**
  19. * The BMP part of the index-2 table is fixed and linear and starts at offset 0.
  20. * Length=2048=0x800=0x10000>>UTRIE2_SHIFT_2.
  21. */
  22. var UTRIE2_INDEX_2_OFFSET = 0;
  23. var UTRIE2_MAX_INDEX_1_LENGTH = 0x100000 >> Trie_1.UTRIE2_SHIFT_1;
  24. /*
  25. * Fixed layout of the first part of the data array. -----------------------
  26. * Starts with 4 blocks (128=0x80 entries) for ASCII.
  27. */
  28. /**
  29. * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80.
  30. * Used with linear access for single bytes 0..0xbf for simple error handling.
  31. * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH.
  32. */
  33. var UTRIE2_BAD_UTF8_DATA_OFFSET = 0x80;
  34. /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */
  35. var UTRIE2_DATA_START_OFFSET = 0xc0;
  36. /* Building a Trie2 ---------------------------------------------------------- */
  37. /*
  38. * These definitions are mostly needed by utrie2_builder.c, but also by
  39. * utrie2_get32() and utrie2_enum().
  40. */
  41. /*
  42. * At build time, leave a gap in the index-2 table,
  43. * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table
  44. * and the supplementary index-1 table.
  45. * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting.
  46. */
  47. var UNEWTRIE2_INDEX_GAP_OFFSET = Trie_1.UTRIE2_INDEX_2_BMP_LENGTH;
  48. var UNEWTRIE2_INDEX_GAP_LENGTH = (Trie_1.UTRIE2_UTF8_2B_INDEX_2_LENGTH + UTRIE2_MAX_INDEX_1_LENGTH + Trie_1.UTRIE2_INDEX_2_MASK) & ~Trie_1.UTRIE2_INDEX_2_MASK;
  49. /**
  50. * Maximum length of the build-time index-2 array.
  51. * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2,
  52. * plus the part of the index-2 table for lead surrogate code points,
  53. * plus the build-time index gap,
  54. * plus the null index-2 block.
  55. */
  56. var UNEWTRIE2_MAX_INDEX_2_LENGTH = (0x110000 >> Trie_1.UTRIE2_SHIFT_2) +
  57. Trie_1.UTRIE2_LSCP_INDEX_2_LENGTH +
  58. UNEWTRIE2_INDEX_GAP_LENGTH +
  59. Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  60. var UNEWTRIE2_INDEX_1_LENGTH = 0x110000 >> Trie_1.UTRIE2_SHIFT_1;
  61. /**
  62. * Maximum length of the build-time data array.
  63. * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block,
  64. * plus values for the 0x400 surrogate code units.
  65. */
  66. var UNEWTRIE2_MAX_DATA_LENGTH = 0x110000 + 0x40 + 0x40 + 0x400;
  67. /* Start with allocation of 16k data entries. */
  68. var UNEWTRIE2_INITIAL_DATA_LENGTH = 1 << 14;
  69. /* Grow about 8x each time. */
  70. var UNEWTRIE2_MEDIUM_DATA_LENGTH = 1 << 17;
  71. /** The null index-2 block, following the gap in the index-2 table. */
  72. var UNEWTRIE2_INDEX_2_NULL_OFFSET = UNEWTRIE2_INDEX_GAP_OFFSET + UNEWTRIE2_INDEX_GAP_LENGTH;
  73. /** The start of allocated index-2 blocks. */
  74. var UNEWTRIE2_INDEX_2_START_OFFSET = UNEWTRIE2_INDEX_2_NULL_OFFSET + Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  75. /**
  76. * The null data block.
  77. * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
  78. * to work with 6-bit trail bytes from 2-byte UTF-8.
  79. */
  80. var UNEWTRIE2_DATA_NULL_OFFSET = UTRIE2_DATA_START_OFFSET;
  81. /** The start of allocated data blocks. */
  82. var UNEWTRIE2_DATA_START_OFFSET = UNEWTRIE2_DATA_NULL_OFFSET + 0x40;
  83. /**
  84. * The start of data blocks for U+0800 and above.
  85. * Below, compaction uses a block length of 64 for 2-byte UTF-8.
  86. * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
  87. * Data values for 0x780 code points beyond ASCII.
  88. */
  89. var UNEWTRIE2_DATA_0800_OFFSET = UNEWTRIE2_DATA_START_OFFSET + 0x780;
  90. /**
  91. * Maximum length of the runtime index array.
  92. * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
  93. * (The actual maximum length is lower,
  94. * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
  95. */
  96. var UTRIE2_MAX_INDEX_LENGTH = 0xffff;
  97. /**
  98. * Maximum length of the runtime data array.
  99. * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
  100. * and by uint16_t UTrie2Header.shiftedDataLength.
  101. */
  102. var UTRIE2_MAX_DATA_LENGTH = 0xffff << Trie_1.UTRIE2_INDEX_SHIFT;
  103. exports.BITS_16 = 16;
  104. exports.BITS_32 = 32;
  105. var isHighSurrogate = function (c) { return c >= 0xd800 && c <= 0xdbff; };
  106. var equalInt = function (a, s, t, length) {
  107. for (var i = 0; i < length; i++) {
  108. if (a[s + i] !== a[t + i]) {
  109. return false;
  110. }
  111. }
  112. return true;
  113. };
  114. var TrieBuilder = /** @class */ (function () {
  115. function TrieBuilder(initialValue, errorValue) {
  116. if (initialValue === void 0) { initialValue = 0; }
  117. if (errorValue === void 0) { errorValue = 0; }
  118. this.initialValue = initialValue;
  119. this.errorValue = errorValue;
  120. this.highStart = 0x110000;
  121. this.data = new Uint32Array(UNEWTRIE2_INITIAL_DATA_LENGTH);
  122. this.dataCapacity = UNEWTRIE2_INITIAL_DATA_LENGTH;
  123. this.highStart = 0x110000;
  124. this.firstFreeBlock = 0; /* no free block in the list */
  125. this.isCompacted = false;
  126. this.index1 = new Uint32Array(UNEWTRIE2_INDEX_1_LENGTH);
  127. this.index2 = new Uint32Array(UNEWTRIE2_MAX_INDEX_2_LENGTH);
  128. /*
  129. * Multi-purpose per-data-block table.
  130. *
  131. * Before compacting:
  132. *
  133. * Per-data-block reference counters/free-block list.
  134. * 0: unused
  135. * >0: reference counter (number of index-2 entries pointing here)
  136. * <0: next free data block in free-block list
  137. *
  138. * While compacting:
  139. *
  140. * Map of adjusted indexes, used in compactData() and compactIndex2().
  141. * Maps from original indexes to new ones.
  142. */
  143. this.map = new Uint32Array(UNEWTRIE2_MAX_DATA_LENGTH >> Trie_1.UTRIE2_SHIFT_2);
  144. /*
  145. * preallocate and reset
  146. * - ASCII
  147. * - the bad-UTF-8-data block
  148. * - the null data block
  149. */
  150. var i, j;
  151. for (i = 0; i < 0x80; ++i) {
  152. this.data[i] = initialValue;
  153. }
  154. for (; i < 0xc0; ++i) {
  155. this.data[i] = errorValue;
  156. }
  157. for (i = UNEWTRIE2_DATA_NULL_OFFSET; i < UNEWTRIE2_DATA_START_OFFSET; ++i) {
  158. this.data[i] = initialValue;
  159. }
  160. this.dataNullOffset = UNEWTRIE2_DATA_NULL_OFFSET;
  161. this.dataLength = UNEWTRIE2_DATA_START_OFFSET;
  162. /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
  163. for (i = 0, j = 0; j < 0x80; ++i, j += Trie_1.UTRIE2_DATA_BLOCK_LENGTH) {
  164. this.index2[i] = j;
  165. this.map[i] = 1;
  166. }
  167. /* reference counts for the bad-UTF-8-data block */
  168. for (; j < 0xc0; ++i, j += Trie_1.UTRIE2_DATA_BLOCK_LENGTH) {
  169. this.map[i] = 0;
  170. }
  171. /*
  172. * Reference counts for the null data block: all blocks except for the ASCII blocks.
  173. * Plus 1 so that we don't drop this block during compaction.
  174. * Plus as many as needed for lead surrogate code points.
  175. */
  176. /* i==newTrie->dataNullOffset */
  177. this.map[i++] = (0x110000 >> Trie_1.UTRIE2_SHIFT_2) - (0x80 >> Trie_1.UTRIE2_SHIFT_2) + 1 + Trie_1.UTRIE2_LSCP_INDEX_2_LENGTH;
  178. j += Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  179. for (; j < UNEWTRIE2_DATA_START_OFFSET; ++i, j += Trie_1.UTRIE2_DATA_BLOCK_LENGTH) {
  180. this.map[i] = 0;
  181. }
  182. /*
  183. * set the remaining indexes in the BMP index-2 block
  184. * to the null data block
  185. */
  186. for (i = 0x80 >> Trie_1.UTRIE2_SHIFT_2; i < Trie_1.UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
  187. this.index2[i] = UNEWTRIE2_DATA_NULL_OFFSET;
  188. }
  189. /*
  190. * Fill the index gap with impossible values so that compaction
  191. * does not overlap other index-2 blocks with the gap.
  192. */
  193. for (i = 0; i < UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
  194. this.index2[UNEWTRIE2_INDEX_GAP_OFFSET + i] = -1;
  195. }
  196. /* set the indexes in the null index-2 block */
  197. for (i = 0; i < Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
  198. this.index2[UNEWTRIE2_INDEX_2_NULL_OFFSET + i] = UNEWTRIE2_DATA_NULL_OFFSET;
  199. }
  200. this.index2NullOffset = UNEWTRIE2_INDEX_2_NULL_OFFSET;
  201. this.index2Length = UNEWTRIE2_INDEX_2_START_OFFSET;
  202. /* set the index-1 indexes for the linear index-2 block */
  203. for (i = 0, j = 0; i < Trie_1.UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; ++i, j += Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH) {
  204. this.index1[i] = j;
  205. }
  206. /* set the remaining index-1 indexes to the null index-2 block */
  207. for (; i < UNEWTRIE2_INDEX_1_LENGTH; ++i) {
  208. this.index1[i] = UNEWTRIE2_INDEX_2_NULL_OFFSET;
  209. }
  210. /*
  211. * Preallocate and reset data for U+0080..U+07ff,
  212. * for 2-byte UTF-8 which will be compacted in 64-blocks
  213. * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
  214. */
  215. for (i = 0x80; i < 0x800; i += Trie_1.UTRIE2_DATA_BLOCK_LENGTH) {
  216. this.set(i, initialValue);
  217. }
  218. }
  219. /**
  220. * Set a value for a code point.
  221. *
  222. * @param c the code point
  223. * @param value the value
  224. */
  225. TrieBuilder.prototype.set = function (c, value) {
  226. if (c < 0 || c > 0x10ffff) {
  227. throw new Error('Invalid code point.');
  228. }
  229. this._set(c, true, value);
  230. return this;
  231. };
  232. /**
  233. * Set a value in a range of code points [start..end].
  234. * All code points c with start<=c<=end will get the value if
  235. * overwrite is TRUE or if the old value is the initial value.
  236. *
  237. * @param start the first code point to get the value
  238. * @param end the last code point to get the value (inclusive)
  239. * @param value the value
  240. * @param overwrite flag for whether old non-initial values are to be overwritten
  241. */
  242. TrieBuilder.prototype.setRange = function (start, end, value, overwrite) {
  243. if (overwrite === void 0) { overwrite = false; }
  244. /*
  245. * repeat value in [start..end]
  246. * mark index values for repeat-data blocks by setting bit 31 of the index values
  247. * fill around existing values if any, if(overwrite)
  248. */
  249. var block, rest, repeatBlock;
  250. if (start > 0x10ffff || start < 0 || end > 0x10ffff || end < 0 || start > end) {
  251. throw new Error('Invalid code point range.');
  252. }
  253. if (!overwrite && value === this.initialValue) {
  254. return this; /* nothing to do */
  255. }
  256. if (this.isCompacted) {
  257. throw new Error('Trie was already compacted');
  258. }
  259. var limit = end + 1;
  260. if ((start & Trie_1.UTRIE2_DATA_MASK) !== 0) {
  261. /* set partial block at [start..following block boundary[ */
  262. block = this.getDataBlock(start, true);
  263. var nextStart = (start + Trie_1.UTRIE2_DATA_BLOCK_LENGTH) & ~Trie_1.UTRIE2_DATA_MASK;
  264. if (nextStart <= limit) {
  265. this.fillBlock(block, start & Trie_1.UTRIE2_DATA_MASK, Trie_1.UTRIE2_DATA_BLOCK_LENGTH, value, this.initialValue, overwrite);
  266. start = nextStart;
  267. }
  268. else {
  269. this.fillBlock(block, start & Trie_1.UTRIE2_DATA_MASK, limit & Trie_1.UTRIE2_DATA_MASK, value, this.initialValue, overwrite);
  270. return this;
  271. }
  272. }
  273. /* number of positions in the last, partial block */
  274. rest = limit & Trie_1.UTRIE2_DATA_MASK;
  275. /* round down limit to a block boundary */
  276. limit &= ~Trie_1.UTRIE2_DATA_MASK;
  277. /* iterate over all-value blocks */
  278. repeatBlock = value === this.initialValue ? this.dataNullOffset : -1;
  279. while (start < limit) {
  280. var i2 = void 0;
  281. var setRepeatBlock = false;
  282. if (value === this.initialValue && this.isInNullBlock(start, true)) {
  283. start += Trie_1.UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
  284. continue;
  285. }
  286. /* get index value */
  287. i2 = this.getIndex2Block(start, true);
  288. i2 += (start >> Trie_1.UTRIE2_SHIFT_2) & Trie_1.UTRIE2_INDEX_2_MASK;
  289. block = this.index2[i2];
  290. if (this.isWritableBlock(block)) {
  291. /* already allocated */
  292. if (overwrite && block >= UNEWTRIE2_DATA_0800_OFFSET) {
  293. /*
  294. * We overwrite all values, and it's not a
  295. * protected (ASCII-linear or 2-byte UTF-8) block:
  296. * replace with the repeatBlock.
  297. */
  298. setRepeatBlock = true;
  299. }
  300. else {
  301. /* !overwrite, or protected block: just write the values into this block */
  302. this.fillBlock(block, 0, Trie_1.UTRIE2_DATA_BLOCK_LENGTH, value, this.initialValue, overwrite);
  303. }
  304. }
  305. else if (this.data[block] !== value && (overwrite || block === this.dataNullOffset)) {
  306. /*
  307. * Set the repeatBlock instead of the null block or previous repeat block:
  308. *
  309. * If !isWritableBlock() then all entries in the block have the same value
  310. * because it's the null block or a range block (the repeatBlock from a previous
  311. * call to utrie2_setRange32()).
  312. * No other blocks are used multiple times before compacting.
  313. *
  314. * The null block is the only non-writable block with the initialValue because
  315. * of the repeatBlock initialization above. (If value==initialValue, then
  316. * the repeatBlock will be the null data block.)
  317. *
  318. * We set our repeatBlock if the desired value differs from the block's value,
  319. * and if we overwrite any data or if the data is all initial values
  320. * (which is the same as the block being the null block, see above).
  321. */
  322. setRepeatBlock = true;
  323. }
  324. if (setRepeatBlock) {
  325. if (repeatBlock >= 0) {
  326. this.setIndex2Entry(i2, repeatBlock);
  327. }
  328. else {
  329. /* create and set and fill the repeatBlock */
  330. repeatBlock = this.getDataBlock(start, true);
  331. this.writeBlock(repeatBlock, value);
  332. }
  333. }
  334. start += Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  335. }
  336. if (rest > 0) {
  337. /* set partial block at [last block boundary..limit[ */
  338. block = this.getDataBlock(start, true);
  339. this.fillBlock(block, 0, rest, value, this.initialValue, overwrite);
  340. }
  341. return this;
  342. };
  343. /**
  344. * Get the value for a code point as stored in the Trie2.
  345. *
  346. * @param codePoint the code point
  347. * @return the value
  348. */
  349. TrieBuilder.prototype.get = function (codePoint) {
  350. if (codePoint < 0 || codePoint > 0x10ffff) {
  351. return this.errorValue;
  352. }
  353. else {
  354. return this._get(codePoint, true);
  355. }
  356. };
  357. TrieBuilder.prototype._get = function (c, fromLSCP) {
  358. var i2;
  359. if (c >= this.highStart && (!(c >= 0xd800 && c < 0xdc00) || fromLSCP)) {
  360. return this.data[this.dataLength - UTRIE2_DATA_GRANULARITY];
  361. }
  362. if (c >= 0xd800 && c < 0xdc00 && fromLSCP) {
  363. i2 = Trie_1.UTRIE2_LSCP_INDEX_2_OFFSET - (0xd800 >> Trie_1.UTRIE2_SHIFT_2) + (c >> Trie_1.UTRIE2_SHIFT_2);
  364. }
  365. else {
  366. i2 = this.index1[c >> Trie_1.UTRIE2_SHIFT_1] + ((c >> Trie_1.UTRIE2_SHIFT_2) & Trie_1.UTRIE2_INDEX_2_MASK);
  367. }
  368. var block = this.index2[i2];
  369. return this.data[block + (c & Trie_1.UTRIE2_DATA_MASK)];
  370. };
  371. TrieBuilder.prototype.freeze = function (valueBits) {
  372. if (valueBits === void 0) { valueBits = exports.BITS_32; }
  373. var i;
  374. var allIndexesLength;
  375. var dataMove; /* >0 if the data is moved to the end of the index array */
  376. /* compact if necessary */
  377. if (!this.isCompacted) {
  378. this.compactTrie();
  379. }
  380. allIndexesLength = this.highStart <= 0x10000 ? Trie_1.UTRIE2_INDEX_1_OFFSET : this.index2Length;
  381. if (valueBits === exports.BITS_16) {
  382. // dataMove = allIndexesLength;
  383. dataMove = 0;
  384. }
  385. else {
  386. dataMove = 0;
  387. }
  388. /* are indexLength and dataLength within limits? */
  389. if (
  390. /* for unshifted indexLength */
  391. allIndexesLength > UTRIE2_MAX_INDEX_LENGTH ||
  392. /* for unshifted dataNullOffset */
  393. dataMove + this.dataNullOffset > 0xffff ||
  394. /* for unshifted 2-byte UTF-8 index-2 values */
  395. dataMove + UNEWTRIE2_DATA_0800_OFFSET > 0xffff ||
  396. /* for shiftedDataLength */
  397. dataMove + this.dataLength > UTRIE2_MAX_DATA_LENGTH) {
  398. throw new Error('Trie data is too large.');
  399. }
  400. var index = new Uint16Array(allIndexesLength);
  401. /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
  402. var destIdx = 0;
  403. for (i = 0; i < Trie_1.UTRIE2_INDEX_2_BMP_LENGTH; i++) {
  404. index[destIdx++] = (this.index2[i] + dataMove) >> Trie_1.UTRIE2_INDEX_SHIFT;
  405. }
  406. /* write UTF-8 2-byte index-2 values, not right-shifted */
  407. for (i = 0; i < 0xc2 - 0xc0; ++i) {
  408. /* C0..C1 */
  409. index[destIdx++] = dataMove + UTRIE2_BAD_UTF8_DATA_OFFSET;
  410. }
  411. for (; i < 0xe0 - 0xc0; ++i) {
  412. /* C2..DF */
  413. index[destIdx++] = dataMove + this.index2[i << (6 - Trie_1.UTRIE2_SHIFT_2)];
  414. }
  415. if (this.highStart > 0x10000) {
  416. var index1Length = (this.highStart - 0x10000) >> Trie_1.UTRIE2_SHIFT_1;
  417. var index2Offset = Trie_1.UTRIE2_INDEX_2_BMP_LENGTH + Trie_1.UTRIE2_UTF8_2B_INDEX_2_LENGTH + index1Length;
  418. /* write 16-bit index-1 values for supplementary code points */
  419. for (i = 0; i < index1Length; i++) {
  420. index[destIdx++] = UTRIE2_INDEX_2_OFFSET + this.index1[i + Trie_1.UTRIE2_OMITTED_BMP_INDEX_1_LENGTH];
  421. }
  422. /*
  423. * write the index-2 array values for supplementary code points,
  424. * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
  425. */
  426. for (i = 0; i < this.index2Length - index2Offset; i++) {
  427. index[destIdx++] = (dataMove + this.index2[index2Offset + i]) >> Trie_1.UTRIE2_INDEX_SHIFT;
  428. }
  429. }
  430. /* write the 16/32-bit data array */
  431. switch (valueBits) {
  432. case exports.BITS_16:
  433. /* write 16-bit data values */
  434. var data16 = new Uint16Array(this.dataLength);
  435. for (i = 0; i < this.dataLength; i++) {
  436. data16[i] = this.data[i];
  437. }
  438. return new Trie_1.Trie(this.initialValue, this.errorValue, this.highStart, dataMove + this.dataLength - UTRIE2_DATA_GRANULARITY, index, data16);
  439. case exports.BITS_32:
  440. /* write 32-bit data values */
  441. var data32 = new Uint32Array(this.dataLength);
  442. for (i = 0; i < this.dataLength; i++) {
  443. data32[i] = this.data[i];
  444. }
  445. return new Trie_1.Trie(this.initialValue, this.errorValue, this.highStart, dataMove + this.dataLength - UTRIE2_DATA_GRANULARITY, index, data32);
  446. default:
  447. throw new Error('Bits should be either 16 or 32');
  448. }
  449. };
  450. /*
  451. * Find the start of the last range in the trie by enumerating backward.
  452. * Indexes for supplementary code points higher than this will be omitted.
  453. */
  454. TrieBuilder.prototype.findHighStart = function (highValue) {
  455. var value;
  456. var i2, j, i2Block, prevI2Block, block, prevBlock;
  457. /* set variables for previous range */
  458. if (highValue === this.initialValue) {
  459. prevI2Block = this.index2NullOffset;
  460. prevBlock = this.dataNullOffset;
  461. }
  462. else {
  463. prevI2Block = -1;
  464. prevBlock = -1;
  465. }
  466. var prev = 0x110000;
  467. /* enumerate index-2 blocks */
  468. var i1 = UNEWTRIE2_INDEX_1_LENGTH;
  469. var c = prev;
  470. while (c > 0) {
  471. i2Block = this.index1[--i1];
  472. if (i2Block === prevI2Block) {
  473. /* the index-2 block is the same as the previous one, and filled with highValue */
  474. c -= UTRIE2_CP_PER_INDEX_1_ENTRY;
  475. continue;
  476. }
  477. prevI2Block = i2Block;
  478. if (i2Block === this.index2NullOffset) {
  479. /* this is the null index-2 block */
  480. if (highValue !== this.initialValue) {
  481. return c;
  482. }
  483. c -= UTRIE2_CP_PER_INDEX_1_ENTRY;
  484. }
  485. else {
  486. /* enumerate data blocks for one index-2 block */
  487. for (i2 = Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH; i2 > 0;) {
  488. block = this.index2[i2Block + --i2];
  489. if (block === prevBlock) {
  490. /* the block is the same as the previous one, and filled with highValue */
  491. c -= Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  492. continue;
  493. }
  494. prevBlock = block;
  495. if (block === this.dataNullOffset) {
  496. /* this is the null data block */
  497. if (highValue !== this.initialValue) {
  498. return c;
  499. }
  500. c -= Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  501. }
  502. else {
  503. for (j = Trie_1.UTRIE2_DATA_BLOCK_LENGTH; j > 0;) {
  504. value = this.data[block + --j];
  505. if (value !== highValue) {
  506. return c;
  507. }
  508. --c;
  509. }
  510. }
  511. }
  512. }
  513. }
  514. /* deliver last range */
  515. return 0;
  516. };
  517. /*
  518. * Compact a build-time trie.
  519. *
  520. * The compaction
  521. * - removes blocks that are identical with earlier ones
  522. * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
  523. * - moves blocks in steps of the data granularity
  524. * - moves and overlaps blocks that overlap with multiple values in the overlap region
  525. *
  526. * It does not
  527. * - try to move and overlap blocks that are not already adjacent
  528. */
  529. TrieBuilder.prototype.compactData = function () {
  530. var start, movedStart;
  531. var blockLength, overlap;
  532. var i, mapIndex, blockCount;
  533. /* do not compact linear-ASCII data */
  534. var newStart = UTRIE2_DATA_START_OFFSET;
  535. for (start = 0, i = 0; start < newStart; start += Trie_1.UTRIE2_DATA_BLOCK_LENGTH, ++i) {
  536. this.map[i] = start;
  537. }
  538. /*
  539. * Start with a block length of 64 for 2-byte UTF-8,
  540. * then switch to UTRIE2_DATA_BLOCK_LENGTH.
  541. */
  542. blockLength = 64;
  543. blockCount = blockLength >> Trie_1.UTRIE2_SHIFT_2;
  544. for (start = newStart; start < this.dataLength;) {
  545. /*
  546. * start: index of first entry of current block
  547. * newStart: index where the current block is to be moved
  548. * (right after current end of already-compacted data)
  549. */
  550. if (start === UNEWTRIE2_DATA_0800_OFFSET) {
  551. blockLength = Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  552. blockCount = 1;
  553. }
  554. /* skip blocks that are not used */
  555. if (this.map[start >> Trie_1.UTRIE2_SHIFT_2] <= 0) {
  556. /* advance start to the next block */
  557. start += blockLength;
  558. /* leave newStart with the previous block! */
  559. continue;
  560. }
  561. /* search for an identical block */
  562. movedStart = this.findSameDataBlock(newStart, start, blockLength);
  563. if (movedStart >= 0) {
  564. /* found an identical block, set the other block's index value for the current block */
  565. for (i = blockCount, mapIndex = start >> Trie_1.UTRIE2_SHIFT_2; i > 0; --i) {
  566. this.map[mapIndex++] = movedStart;
  567. movedStart += Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  568. }
  569. /* advance start to the next block */
  570. start += blockLength;
  571. /* leave newStart with the previous block! */
  572. continue;
  573. }
  574. /* see if the beginning of this block can be overlapped with the end of the previous block */
  575. /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
  576. for (overlap = blockLength - UTRIE2_DATA_GRANULARITY; overlap > 0 && !equalInt(this.data, newStart - overlap, start, overlap); overlap -= UTRIE2_DATA_GRANULARITY) { }
  577. if (overlap > 0 || newStart < start) {
  578. /* some overlap, or just move the whole block */
  579. movedStart = newStart - overlap;
  580. for (i = blockCount, mapIndex = start >> Trie_1.UTRIE2_SHIFT_2; i > 0; --i) {
  581. this.map[mapIndex++] = movedStart;
  582. movedStart += Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  583. }
  584. /* move the non-overlapping indexes to their new positions */
  585. start += overlap;
  586. for (i = blockLength - overlap; i > 0; --i) {
  587. this.data[newStart++] = this.data[start++];
  588. }
  589. }
  590. else {
  591. /* no overlap && newStart==start */
  592. for (i = blockCount, mapIndex = start >> Trie_1.UTRIE2_SHIFT_2; i > 0; --i) {
  593. this.map[mapIndex++] = start;
  594. start += Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  595. }
  596. newStart = start;
  597. }
  598. }
  599. /* now adjust the index-2 table */
  600. for (i = 0; i < this.index2Length; ++i) {
  601. if (i === UNEWTRIE2_INDEX_GAP_OFFSET) {
  602. /* Gap indexes are invalid (-1). Skip over the gap. */
  603. i += UNEWTRIE2_INDEX_GAP_LENGTH;
  604. }
  605. this.index2[i] = this.map[this.index2[i] >> Trie_1.UTRIE2_SHIFT_2];
  606. }
  607. this.dataNullOffset = this.map[this.dataNullOffset >> Trie_1.UTRIE2_SHIFT_2];
  608. /* ensure dataLength alignment */
  609. while ((newStart & (UTRIE2_DATA_GRANULARITY - 1)) !== 0) {
  610. this.data[newStart++] = this.initialValue;
  611. }
  612. this.dataLength = newStart;
  613. };
  614. TrieBuilder.prototype.findSameDataBlock = function (dataLength, otherBlock, blockLength) {
  615. var block = 0;
  616. /* ensure that we do not even partially get past dataLength */
  617. dataLength -= blockLength;
  618. for (; block <= dataLength; block += UTRIE2_DATA_GRANULARITY) {
  619. if (equalInt(this.data, block, otherBlock, blockLength)) {
  620. return block;
  621. }
  622. }
  623. return -1;
  624. };
  625. TrieBuilder.prototype.compactTrie = function () {
  626. var highValue = this.get(0x10ffff);
  627. /* find highStart and round it up */
  628. var localHighStart = this.findHighStart(highValue);
  629. localHighStart = (localHighStart + (UTRIE2_CP_PER_INDEX_1_ENTRY - 1)) & ~(UTRIE2_CP_PER_INDEX_1_ENTRY - 1);
  630. if (localHighStart === 0x110000) {
  631. highValue = this.errorValue;
  632. }
  633. /*
  634. * Set trie->highStart only after utrie2_get32(trie, highStart).
  635. * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
  636. */
  637. this.highStart = localHighStart;
  638. if (this.highStart < 0x110000) {
  639. /* Blank out [highStart..10ffff] to release associated data blocks. */
  640. var suppHighStart = this.highStart <= 0x10000 ? 0x10000 : this.highStart;
  641. this.setRange(suppHighStart, 0x10ffff, this.initialValue, true);
  642. }
  643. this.compactData();
  644. if (this.highStart > 0x10000) {
  645. this.compactIndex2();
  646. }
  647. /*
  648. * Store the highValue in the data array and round up the dataLength.
  649. * Must be done after compactData() because that assumes that dataLength
  650. * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
  651. */
  652. this.data[this.dataLength++] = highValue;
  653. while ((this.dataLength & (UTRIE2_DATA_GRANULARITY - 1)) !== 0) {
  654. this.data[this.dataLength++] = this.initialValue;
  655. }
  656. this.isCompacted = true;
  657. };
  658. TrieBuilder.prototype.compactIndex2 = function () {
  659. var i, start, movedStart, overlap;
  660. /* do not compact linear-BMP index-2 blocks */
  661. var newStart = Trie_1.UTRIE2_INDEX_2_BMP_LENGTH;
  662. for (start = 0, i = 0; start < newStart; start += Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
  663. this.map[i] = start;
  664. }
  665. /* Reduce the index table gap to what will be needed at runtime. */
  666. newStart += Trie_1.UTRIE2_UTF8_2B_INDEX_2_LENGTH + ((this.highStart - 0x10000) >> Trie_1.UTRIE2_SHIFT_1);
  667. for (start = UNEWTRIE2_INDEX_2_NULL_OFFSET; start < this.index2Length;) {
  668. /*
  669. * start: index of first entry of current block
  670. * newStart: index where the current block is to be moved
  671. * (right after current end of already-compacted data)
  672. */
  673. /* search for an identical block */
  674. if ((movedStart = this.findSameIndex2Block(newStart, start)) >= 0) {
  675. /* found an identical block, set the other block's index value for the current block */
  676. this.map[start >> Trie_1.UTRIE2_SHIFT_1_2] = movedStart;
  677. /* advance start to the next block */
  678. start += Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  679. /* leave newStart with the previous block! */
  680. continue;
  681. }
  682. /* see if the beginning of this block can be overlapped with the end of the previous block */
  683. /* look for maximum overlap with the previous, adjacent block */
  684. for (overlap = Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH - 1; overlap > 0 && !equalInt(this.index2, newStart - overlap, start, overlap); --overlap) { }
  685. if (overlap > 0 || newStart < start) {
  686. /* some overlap, or just move the whole block */
  687. this.map[start >> Trie_1.UTRIE2_SHIFT_1_2] = newStart - overlap;
  688. /* move the non-overlapping indexes to their new positions */
  689. start += overlap;
  690. for (i = Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH - overlap; i > 0; --i) {
  691. this.index2[newStart++] = this.index2[start++];
  692. }
  693. }
  694. else {
  695. /* no overlap && newStart==start */ this.map[start >> Trie_1.UTRIE2_SHIFT_1_2] = start;
  696. start += Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  697. newStart = start;
  698. }
  699. }
  700. /* now adjust the index-1 table */
  701. for (i = 0; i < UNEWTRIE2_INDEX_1_LENGTH; ++i) {
  702. this.index1[i] = this.map[this.index1[i] >> Trie_1.UTRIE2_SHIFT_1_2];
  703. }
  704. this.index2NullOffset = this.map[this.index2NullOffset >> Trie_1.UTRIE2_SHIFT_1_2];
  705. /*
  706. * Ensure data table alignment:
  707. * Needs to be granularity-aligned for 16-bit trie
  708. * (so that dataMove will be down-shiftable),
  709. * and 2-aligned for uint32_t data.
  710. */
  711. while ((newStart & ((UTRIE2_DATA_GRANULARITY - 1) | 1)) !== 0) {
  712. /* Arbitrary value: 0x3fffc not possible for real data. */
  713. this.index2[newStart++] = 0x0000ffff << Trie_1.UTRIE2_INDEX_SHIFT;
  714. }
  715. this.index2Length = newStart;
  716. };
  717. TrieBuilder.prototype.findSameIndex2Block = function (index2Length, otherBlock) {
  718. /* ensure that we do not even partially get past index2Length */
  719. index2Length -= Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  720. for (var block = 0; block <= index2Length; ++block) {
  721. if (equalInt(this.index2, block, otherBlock, Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH)) {
  722. return block;
  723. }
  724. }
  725. return -1;
  726. };
  727. TrieBuilder.prototype._set = function (c, forLSCP, value) {
  728. if (this.isCompacted) {
  729. throw new Error('Trie was already compacted');
  730. }
  731. var block = this.getDataBlock(c, forLSCP);
  732. this.data[block + (c & Trie_1.UTRIE2_DATA_MASK)] = value;
  733. return this;
  734. };
  735. TrieBuilder.prototype.writeBlock = function (block, value) {
  736. var limit = block + Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  737. while (block < limit) {
  738. this.data[block++] = value;
  739. }
  740. };
  741. TrieBuilder.prototype.isInNullBlock = function (c, forLSCP) {
  742. var i2 = isHighSurrogate(c) && forLSCP
  743. ? Trie_1.UTRIE2_LSCP_INDEX_2_OFFSET - (0xd800 >> Trie_1.UTRIE2_SHIFT_2) + (c >> Trie_1.UTRIE2_SHIFT_2)
  744. : this.index1[c >> Trie_1.UTRIE2_SHIFT_1] + ((c >> Trie_1.UTRIE2_SHIFT_2) & Trie_1.UTRIE2_INDEX_2_MASK);
  745. var block = this.index2[i2];
  746. return block === this.dataNullOffset;
  747. };
  748. TrieBuilder.prototype.fillBlock = function (block, start, limit, value, initialValue, overwrite) {
  749. var pLimit = block + limit;
  750. if (overwrite) {
  751. for (var i = block + start; i < pLimit; i++) {
  752. this.data[i] = value;
  753. }
  754. }
  755. else {
  756. for (var i = block + start; i < pLimit; i++) {
  757. if (this.data[i] === initialValue) {
  758. this.data[i] = value;
  759. }
  760. }
  761. }
  762. };
  763. TrieBuilder.prototype.setIndex2Entry = function (i2, block) {
  764. ++this.map[block >> Trie_1.UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */
  765. var oldBlock = this.index2[i2];
  766. if (0 === --this.map[oldBlock >> Trie_1.UTRIE2_SHIFT_2]) {
  767. this.releaseDataBlock(oldBlock);
  768. }
  769. this.index2[i2] = block;
  770. };
  771. TrieBuilder.prototype.releaseDataBlock = function (block) {
  772. /* put this block at the front of the free-block chain */
  773. this.map[block >> Trie_1.UTRIE2_SHIFT_2] = -this.firstFreeBlock;
  774. this.firstFreeBlock = block;
  775. };
  776. TrieBuilder.prototype.getDataBlock = function (c, forLSCP) {
  777. var i2 = this.getIndex2Block(c, forLSCP);
  778. i2 += (c >> Trie_1.UTRIE2_SHIFT_2) & Trie_1.UTRIE2_INDEX_2_MASK;
  779. var oldBlock = this.index2[i2];
  780. if (this.isWritableBlock(oldBlock)) {
  781. return oldBlock;
  782. }
  783. /* allocate a new data block */
  784. var newBlock = this.allocDataBlock(oldBlock);
  785. this.setIndex2Entry(i2, newBlock);
  786. return newBlock;
  787. };
  788. TrieBuilder.prototype.isWritableBlock = function (block) {
  789. return block !== this.dataNullOffset && 1 === this.map[block >> Trie_1.UTRIE2_SHIFT_2];
  790. };
  791. TrieBuilder.prototype.getIndex2Block = function (c, forLSCP) {
  792. if (c >= 0xd800 && c < 0xdc00 && forLSCP) {
  793. return Trie_1.UTRIE2_LSCP_INDEX_2_OFFSET;
  794. }
  795. var i1 = c >> Trie_1.UTRIE2_SHIFT_1;
  796. var i2 = this.index1[i1];
  797. if (i2 === this.index2NullOffset) {
  798. i2 = this.allocIndex2Block();
  799. this.index1[i1] = i2;
  800. }
  801. return i2;
  802. };
  803. TrieBuilder.prototype.allocDataBlock = function (copyBlock) {
  804. var newBlock;
  805. if (this.firstFreeBlock !== 0) {
  806. /* get the first free block */
  807. newBlock = this.firstFreeBlock;
  808. this.firstFreeBlock = -this.map[newBlock >> Trie_1.UTRIE2_SHIFT_2];
  809. }
  810. else {
  811. /* get a new block from the high end */
  812. newBlock = this.dataLength;
  813. var newTop = newBlock + Trie_1.UTRIE2_DATA_BLOCK_LENGTH;
  814. if (newTop > this.dataCapacity) {
  815. var capacity = void 0;
  816. /* out of memory in the data array */
  817. if (this.dataCapacity < UNEWTRIE2_MEDIUM_DATA_LENGTH) {
  818. capacity = UNEWTRIE2_MEDIUM_DATA_LENGTH;
  819. }
  820. else if (this.dataCapacity < UNEWTRIE2_MAX_DATA_LENGTH) {
  821. capacity = UNEWTRIE2_MAX_DATA_LENGTH;
  822. }
  823. else {
  824. /*
  825. * Should never occur.
  826. * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
  827. * or the code writes more values than should be possible.
  828. */
  829. throw new Error('Internal error in Trie creation.');
  830. }
  831. var newData = new Uint32Array(capacity);
  832. newData.set(this.data.subarray(0, this.dataLength));
  833. this.data = newData;
  834. this.dataCapacity = capacity;
  835. }
  836. this.dataLength = newTop;
  837. }
  838. this.data.set(this.data.subarray(copyBlock, copyBlock + Trie_1.UTRIE2_DATA_BLOCK_LENGTH), newBlock);
  839. this.map[newBlock >> Trie_1.UTRIE2_SHIFT_2] = 0;
  840. return newBlock;
  841. };
  842. TrieBuilder.prototype.allocIndex2Block = function () {
  843. var newBlock = this.index2Length;
  844. var newTop = newBlock + Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH;
  845. if (newTop > this.index2.length) {
  846. throw new Error('Internal error in Trie creation.');
  847. /*
  848. * Should never occur.
  849. * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
  850. * or the code writes more values than should be possible.
  851. */
  852. }
  853. this.index2Length = newTop;
  854. this.index2.set(this.index2.subarray(this.index2NullOffset, this.index2NullOffset + Trie_1.UTRIE2_INDEX_2_BLOCK_LENGTH), newBlock);
  855. return newBlock;
  856. };
  857. return TrieBuilder;
  858. }());
  859. exports.TrieBuilder = TrieBuilder;
  860. var serializeBase64 = function (trie) {
  861. var index = trie.index;
  862. var data = trie.data;
  863. if (!(index instanceof Uint16Array) || !(data instanceof Uint16Array || data instanceof Uint32Array)) {
  864. throw new Error('TrieBuilder serializer only support TypedArrays');
  865. }
  866. var headerLength = Uint32Array.BYTES_PER_ELEMENT * 6;
  867. var bufferLength = headerLength + index.byteLength + data.byteLength;
  868. var buffer = new ArrayBuffer(Math.ceil(bufferLength / 4) * 4);
  869. var view32 = new Uint32Array(buffer);
  870. var view16 = new Uint16Array(buffer);
  871. view32[0] = trie.initialValue;
  872. view32[1] = trie.errorValue;
  873. view32[2] = trie.highStart;
  874. view32[3] = trie.highValueIndex;
  875. view32[4] = index.byteLength;
  876. // $FlowFixMe
  877. view32[5] = data.BYTES_PER_ELEMENT;
  878. view16.set(index, headerLength / Uint16Array.BYTES_PER_ELEMENT);
  879. if (data.BYTES_PER_ELEMENT === Uint16Array.BYTES_PER_ELEMENT) {
  880. view16.set(data, (headerLength + index.byteLength) / Uint16Array.BYTES_PER_ELEMENT);
  881. }
  882. else {
  883. view32.set(data, Math.ceil((headerLength + index.byteLength) / Uint32Array.BYTES_PER_ELEMENT));
  884. }
  885. return [base64_arraybuffer_1.encode(new Uint8Array(buffer)), buffer.byteLength];
  886. };
  887. exports.serializeBase64 = serializeBase64;
  888. //# sourceMappingURL=TrieBuilder.js.map