LineBreak.js 20 KB


  1. 'use strict';
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.LineBreaker = exports.inlineBreakOpportunities = exports.lineBreakAtIndex = exports.codePointsToCharacterClasses = exports.UnicodeTrie = exports.BREAK_ALLOWED = exports.BREAK_NOT_ALLOWED = exports.BREAK_MANDATORY = exports.classes = exports.LETTER_NUMBER_MODIFIER = void 0;
  4. var utrie_1 = require("utrie");
  5. var linebreak_trie_1 = require("./linebreak-trie");
  6. var Util_1 = require("./Util");
  7. exports.LETTER_NUMBER_MODIFIER = 50;
  8. // Non-tailorable Line Breaking Classes
  9. var BK = 1; // Cause a line break (after)
  10. var CR = 2; // Cause a line break (after), except between CR and LF
  11. var LF = 3; // Cause a line break (after)
  12. var CM = 4; // Prohibit a line break between the character and the preceding character
  13. var NL = 5; // Cause a line break (after)
  14. var SG = 6; // Do not occur in well-formed text
  15. var WJ = 7; // Prohibit line breaks before and after
  16. var ZW = 8; // Provide a break opportunity
  17. var GL = 9; // Prohibit line breaks before and after
  18. var SP = 10; // Enable indirect line breaks
  19. var ZWJ = 11; // Prohibit line breaks within joiner sequences
  20. // Break Opportunities
  21. var B2 = 12; // Provide a line break opportunity before and after the character
  22. var BA = 13; // Generally provide a line break opportunity after the character
  23. var BB = 14; // Generally provide a line break opportunity before the character
  24. var HY = 15; // Provide a line break opportunity after the character, except in numeric context
  25. var CB = 16; // Provide a line break opportunity contingent on additional information
  26. // Characters Prohibiting Certain Breaks
  27. var CL = 17; // Prohibit line breaks before
  28. var CP = 18; // Prohibit line breaks before
  29. var EX = 19; // Prohibit line breaks before
  30. var IN = 20; // Allow only indirect line breaks between pairs
  31. var NS = 21; // Allow only indirect line breaks before
  32. var OP = 22; // Prohibit line breaks after
  33. var QU = 23; // Act like they are both opening and closing
  34. // Numeric Context
  35. var IS = 24; // Prevent breaks after any and before numeric
  36. var NU = 25; // Form numeric expressions for line breaking purposes
  37. var PO = 26; // Do not break following a numeric expression
  38. var PR = 27; // Do not break in front of a numeric expression
  39. var SY = 28; // Prevent a break before; and allow a break after
  40. // Other Characters
  41. var AI = 29; // Act like AL when the resolvedEAW is N; otherwise; act as ID
  42. var AL = 30; // Are alphabetic characters or symbols that are used with alphabetic characters
  43. var CJ = 31; // Treat as NS or ID for strict or normal breaking.
  44. var EB = 32; // Do not break from following Emoji Modifier
  45. var EM = 33; // Do not break from preceding Emoji Base
  46. var H2 = 34; // Form Korean syllable blocks
  47. var H3 = 35; // Form Korean syllable blocks
  48. var HL = 36; // Do not break around a following hyphen; otherwise act as Alphabetic
  49. var ID = 37; // Break before or after; except in some numeric context
  50. var JL = 38; // Form Korean syllable blocks
  51. var JV = 39; // Form Korean syllable blocks
  52. var JT = 40; // Form Korean syllable blocks
  53. var RI = 41; // Keep pairs together. For pairs; break before and after other classes
  54. var SA = 42; // Provide a line break opportunity contingent on additional, language-specific context analysis
  55. var XX = 43; // Have as yet unknown line breaking behavior or unassigned code positions
  56. var ea_OP = [0x2329, 0xff08];
  57. exports.classes = {
  58. BK: BK,
  59. CR: CR,
  60. LF: LF,
  61. CM: CM,
  62. NL: NL,
  63. SG: SG,
  64. WJ: WJ,
  65. ZW: ZW,
  66. GL: GL,
  67. SP: SP,
  68. ZWJ: ZWJ,
  69. B2: B2,
  70. BA: BA,
  71. BB: BB,
  72. HY: HY,
  73. CB: CB,
  74. CL: CL,
  75. CP: CP,
  76. EX: EX,
  77. IN: IN,
  78. NS: NS,
  79. OP: OP,
  80. QU: QU,
  81. IS: IS,
  82. NU: NU,
  83. PO: PO,
  84. PR: PR,
  85. SY: SY,
  86. AI: AI,
  87. AL: AL,
  88. CJ: CJ,
  89. EB: EB,
  90. EM: EM,
  91. H2: H2,
  92. H3: H3,
  93. HL: HL,
  94. ID: ID,
  95. JL: JL,
  96. JV: JV,
  97. JT: JT,
  98. RI: RI,
  99. SA: SA,
  100. XX: XX,
  101. };
  102. exports.BREAK_MANDATORY = '!';
  103. exports.BREAK_NOT_ALLOWED = '×';
  104. exports.BREAK_ALLOWED = '÷';
  105. exports.UnicodeTrie = utrie_1.createTrieFromBase64(linebreak_trie_1.base64, linebreak_trie_1.byteLength);
  106. var ALPHABETICS = [AL, HL];
  107. var HARD_LINE_BREAKS = [BK, CR, LF, NL];
  108. var SPACE = [SP, ZW];
  109. var PREFIX_POSTFIX = [PR, PO];
  110. var LINE_BREAKS = HARD_LINE_BREAKS.concat(SPACE);
  111. var KOREAN_SYLLABLE_BLOCK = [JL, JV, JT, H2, H3];
  112. var HYPHEN = [HY, BA];
  113. var codePointsToCharacterClasses = function (codePoints, lineBreak) {
  114. if (lineBreak === void 0) { lineBreak = 'strict'; }
  115. var types = [];
  116. var indices = [];
  117. var categories = [];
  118. codePoints.forEach(function (codePoint, index) {
  119. var classType = exports.UnicodeTrie.get(codePoint);
  120. if (classType > exports.LETTER_NUMBER_MODIFIER) {
  121. categories.push(true);
  122. classType -= exports.LETTER_NUMBER_MODIFIER;
  123. }
  124. else {
  125. categories.push(false);
  126. }
  127. if (['normal', 'auto', 'loose'].indexOf(lineBreak) !== -1) {
  128. // U+2010, – U+2013, 〜 U+301C, ゠ U+30A0
  129. if ([0x2010, 0x2013, 0x301c, 0x30a0].indexOf(codePoint) !== -1) {
  130. indices.push(index);
  131. return types.push(CB);
  132. }
  133. }
  134. if (classType === CM || classType === ZWJ) {
  135. // LB10 Treat any remaining combining mark or ZWJ as AL.
  136. if (index === 0) {
  137. indices.push(index);
  138. return types.push(AL);
  139. }
  140. // LB9 Do not break a combining character sequence; treat it as if it has the line breaking class of
  141. // the base character in all of the following rules. Treat ZWJ as if it were CM.
  142. var prev = types[index - 1];
  143. if (LINE_BREAKS.indexOf(prev) === -1) {
  144. indices.push(indices[index - 1]);
  145. return types.push(prev);
  146. }
  147. indices.push(index);
  148. return types.push(AL);
  149. }
  150. indices.push(index);
  151. if (classType === CJ) {
  152. return types.push(lineBreak === 'strict' ? NS : ID);
  153. }
  154. if (classType === SA) {
  155. return types.push(AL);
  156. }
  157. if (classType === AI) {
  158. return types.push(AL);
  159. }
  160. // For supplementary characters, a useful default is to treat characters in the range 10000..1FFFD as AL
  161. // and characters in the ranges 20000..2FFFD and 30000..3FFFD as ID, until the implementation can be revised
  162. // to take into account the actual line breaking properties for these characters.
  163. if (classType === XX) {
  164. if ((codePoint >= 0x20000 && codePoint <= 0x2fffd) || (codePoint >= 0x30000 && codePoint <= 0x3fffd)) {
  165. return types.push(ID);
  166. }
  167. else {
  168. return types.push(AL);
  169. }
  170. }
  171. types.push(classType);
  172. });
  173. return [indices, types, categories];
  174. };
  175. exports.codePointsToCharacterClasses = codePointsToCharacterClasses;
  176. var isAdjacentWithSpaceIgnored = function (a, b, currentIndex, classTypes) {
  177. var current = classTypes[currentIndex];
  178. if (Array.isArray(a) ? a.indexOf(current) !== -1 : a === current) {
  179. var i = currentIndex;
  180. while (i <= classTypes.length) {
  181. i++;
  182. var next = classTypes[i];
  183. if (next === b) {
  184. return true;
  185. }
  186. if (next !== SP) {
  187. break;
  188. }
  189. }
  190. }
  191. if (current === SP) {
  192. var i = currentIndex;
  193. while (i > 0) {
  194. i--;
  195. var prev = classTypes[i];
  196. if (Array.isArray(a) ? a.indexOf(prev) !== -1 : a === prev) {
  197. var n = currentIndex;
  198. while (n <= classTypes.length) {
  199. n++;
  200. var next = classTypes[n];
  201. if (next === b) {
  202. return true;
  203. }
  204. if (next !== SP) {
  205. break;
  206. }
  207. }
  208. }
  209. if (prev !== SP) {
  210. break;
  211. }
  212. }
  213. }
  214. return false;
  215. };
  216. var previousNonSpaceClassType = function (currentIndex, classTypes) {
  217. var i = currentIndex;
  218. while (i >= 0) {
  219. var type = classTypes[i];
  220. if (type === SP) {
  221. i--;
  222. }
  223. else {
  224. return type;
  225. }
  226. }
  227. return 0;
  228. };
  229. var _lineBreakAtIndex = function (codePoints, classTypes, indicies, index, forbiddenBreaks) {
  230. if (indicies[index] === 0) {
  231. return exports.BREAK_NOT_ALLOWED;
  232. }
  233. var currentIndex = index - 1;
  234. if (Array.isArray(forbiddenBreaks) && forbiddenBreaks[currentIndex] === true) {
  235. return exports.BREAK_NOT_ALLOWED;
  236. }
  237. var beforeIndex = currentIndex - 1;
  238. var afterIndex = currentIndex + 1;
  239. var current = classTypes[currentIndex];
  240. // LB4 Always break after hard line breaks.
  241. // LB5 Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks.
  242. var before = beforeIndex >= 0 ? classTypes[beforeIndex] : 0;
  243. var next = classTypes[afterIndex];
  244. if (current === CR && next === LF) {
  245. return exports.BREAK_NOT_ALLOWED;
  246. }
  247. if (HARD_LINE_BREAKS.indexOf(current) !== -1) {
  248. return exports.BREAK_MANDATORY;
  249. }
  250. // LB6 Do not break before hard line breaks.
  251. if (HARD_LINE_BREAKS.indexOf(next) !== -1) {
  252. return exports.BREAK_NOT_ALLOWED;
  253. }
  254. // LB7 Do not break before spaces or zero width space.
  255. if (SPACE.indexOf(next) !== -1) {
  256. return exports.BREAK_NOT_ALLOWED;
  257. }
  258. // LB8 Break before any character following a zero-width space, even if one or more spaces intervene.
  259. if (previousNonSpaceClassType(currentIndex, classTypes) === ZW) {
  260. return exports.BREAK_ALLOWED;
  261. }
  262. // LB8a Do not break after a zero width joiner.
  263. if (exports.UnicodeTrie.get(codePoints[currentIndex]) === ZWJ) {
  264. return exports.BREAK_NOT_ALLOWED;
  265. }
  266. // zwj emojis
  267. if ((current === EB || current === EM) && exports.UnicodeTrie.get(codePoints[afterIndex]) === ZWJ) {
  268. return exports.BREAK_NOT_ALLOWED;
  269. }
  270. // LB11 Do not break before or after Word joiner and related characters.
  271. if (current === WJ || next === WJ) {
  272. return exports.BREAK_NOT_ALLOWED;
  273. }
  274. // LB12 Do not break after NBSP and related characters.
  275. if (current === GL) {
  276. return exports.BREAK_NOT_ALLOWED;
  277. }
  278. // LB12a Do not break before NBSP and related characters, except after spaces and hyphens.
  279. if ([SP, BA, HY].indexOf(current) === -1 && next === GL) {
  280. return exports.BREAK_NOT_ALLOWED;
  281. }
  282. // LB13 Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces.
  283. if ([CL, CP, EX, IS, SY].indexOf(next) !== -1) {
  284. return exports.BREAK_NOT_ALLOWED;
  285. }
  286. // LB14 Do not break after ‘[’, even after spaces.
  287. if (previousNonSpaceClassType(currentIndex, classTypes) === OP) {
  288. return exports.BREAK_NOT_ALLOWED;
  289. }
  290. // LB15 Do not break within ‘”[’, even with intervening spaces.
  291. if (isAdjacentWithSpaceIgnored(QU, OP, currentIndex, classTypes)) {
  292. return exports.BREAK_NOT_ALLOWED;
  293. }
  294. // LB16 Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces.
  295. if (isAdjacentWithSpaceIgnored([CL, CP], NS, currentIndex, classTypes)) {
  296. return exports.BREAK_NOT_ALLOWED;
  297. }
  298. // LB17 Do not break within ‘——’, even with intervening spaces.
  299. if (isAdjacentWithSpaceIgnored(B2, B2, currentIndex, classTypes)) {
  300. return exports.BREAK_NOT_ALLOWED;
  301. }
  302. // LB18 Break after spaces.
  303. if (current === SP) {
  304. return exports.BREAK_ALLOWED;
  305. }
  306. // LB19 Do not break before or after quotation marks, such as ‘ ” ’.
  307. if (current === QU || next === QU) {
  308. return exports.BREAK_NOT_ALLOWED;
  309. }
  310. // LB20 Break before and after unresolved CB.
  311. if (next === CB || current === CB) {
  312. return exports.BREAK_ALLOWED;
  313. }
  314. // LB21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana, and other non-starters, or after acute accents.
  315. if ([BA, HY, NS].indexOf(next) !== -1 || current === BB) {
  316. return exports.BREAK_NOT_ALLOWED;
  317. }
  318. // LB21a Don't break after Hebrew + Hyphen.
  319. if (before === HL && HYPHEN.indexOf(current) !== -1) {
  320. return exports.BREAK_NOT_ALLOWED;
  321. }
  322. // LB21b Don’t break between Solidus and Hebrew letters.
  323. if (current === SY && next === HL) {
  324. return exports.BREAK_NOT_ALLOWED;
  325. }
  326. // LB22 Do not break before ellipsis.
  327. if (next === IN) {
  328. return exports.BREAK_NOT_ALLOWED;
  329. }
  330. // LB23 Do not break between digits and letters.
  331. if ((ALPHABETICS.indexOf(next) !== -1 && current === NU) || (ALPHABETICS.indexOf(current) !== -1 && next === NU)) {
  332. return exports.BREAK_NOT_ALLOWED;
  333. }
  334. // LB23a Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes.
  335. if ((current === PR && [ID, EB, EM].indexOf(next) !== -1) ||
  336. ([ID, EB, EM].indexOf(current) !== -1 && next === PO)) {
  337. return exports.BREAK_NOT_ALLOWED;
  338. }
  339. // LB24 Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix.
  340. if ((ALPHABETICS.indexOf(current) !== -1 && PREFIX_POSTFIX.indexOf(next) !== -1) ||
  341. (PREFIX_POSTFIX.indexOf(current) !== -1 && ALPHABETICS.indexOf(next) !== -1)) {
  342. return exports.BREAK_NOT_ALLOWED;
  343. }
  344. // LB25 Do not break between the following pairs of classes relevant to numbers:
  345. if (
  346. // (PR | PO) × ( OP | HY )? NU
  347. ([PR, PO].indexOf(current) !== -1 &&
  348. (next === NU || ([OP, HY].indexOf(next) !== -1 && classTypes[afterIndex + 1] === NU))) ||
  349. // ( OP | HY ) × NU
  350. ([OP, HY].indexOf(current) !== -1 && next === NU) ||
  351. // NU × (NU | SY | IS)
  352. (current === NU && [NU, SY, IS].indexOf(next) !== -1)) {
  353. return exports.BREAK_NOT_ALLOWED;
  354. }
  355. // NU (NU | SY | IS)* × (NU | SY | IS | CL | CP)
  356. if ([NU, SY, IS, CL, CP].indexOf(next) !== -1) {
  357. var prevIndex = currentIndex;
  358. while (prevIndex >= 0) {
  359. var type = classTypes[prevIndex];
  360. if (type === NU) {
  361. return exports.BREAK_NOT_ALLOWED;
  362. }
  363. else if ([SY, IS].indexOf(type) !== -1) {
  364. prevIndex--;
  365. }
  366. else {
  367. break;
  368. }
  369. }
  370. }
  371. // NU (NU | SY | IS)* (CL | CP)? × (PO | PR))
  372. if ([PR, PO].indexOf(next) !== -1) {
  373. var prevIndex = [CL, CP].indexOf(current) !== -1 ? beforeIndex : currentIndex;
  374. while (prevIndex >= 0) {
  375. var type = classTypes[prevIndex];
  376. if (type === NU) {
  377. return exports.BREAK_NOT_ALLOWED;
  378. }
  379. else if ([SY, IS].indexOf(type) !== -1) {
  380. prevIndex--;
  381. }
  382. else {
  383. break;
  384. }
  385. }
  386. }
  387. // LB26 Do not break a Korean syllable.
  388. if ((JL === current && [JL, JV, H2, H3].indexOf(next) !== -1) ||
  389. ([JV, H2].indexOf(current) !== -1 && [JV, JT].indexOf(next) !== -1) ||
  390. ([JT, H3].indexOf(current) !== -1 && next === JT)) {
  391. return exports.BREAK_NOT_ALLOWED;
  392. }
  393. // LB27 Treat a Korean Syllable Block the same as ID.
  394. if ((KOREAN_SYLLABLE_BLOCK.indexOf(current) !== -1 && [IN, PO].indexOf(next) !== -1) ||
  395. (KOREAN_SYLLABLE_BLOCK.indexOf(next) !== -1 && current === PR)) {
  396. return exports.BREAK_NOT_ALLOWED;
  397. }
  398. // LB28 Do not break between alphabetics (“at”).
  399. if (ALPHABETICS.indexOf(current) !== -1 && ALPHABETICS.indexOf(next) !== -1) {
  400. return exports.BREAK_NOT_ALLOWED;
  401. }
  402. // LB29 Do not break between numeric punctuation and alphabetics (“e.g.”).
  403. if (current === IS && ALPHABETICS.indexOf(next) !== -1) {
  404. return exports.BREAK_NOT_ALLOWED;
  405. }
  406. // LB30 Do not break between letters, numbers, or ordinary symbols and opening or closing parentheses.
  407. if ((ALPHABETICS.concat(NU).indexOf(current) !== -1 &&
  408. next === OP &&
  409. ea_OP.indexOf(codePoints[afterIndex]) === -1) ||
  410. (ALPHABETICS.concat(NU).indexOf(next) !== -1 && current === CP)) {
  411. return exports.BREAK_NOT_ALLOWED;
  412. }
  413. // LB30a Break between two regional indicator symbols if and only if there are an even number of regional
  414. // indicators preceding the position of the break.
  415. if (current === RI && next === RI) {
  416. var i = indicies[currentIndex];
  417. var count = 1;
  418. while (i > 0) {
  419. i--;
  420. if (classTypes[i] === RI) {
  421. count++;
  422. }
  423. else {
  424. break;
  425. }
  426. }
  427. if (count % 2 !== 0) {
  428. return exports.BREAK_NOT_ALLOWED;
  429. }
  430. }
  431. // LB30b Do not break between an emoji base and an emoji modifier.
  432. if (current === EB && next === EM) {
  433. return exports.BREAK_NOT_ALLOWED;
  434. }
  435. return exports.BREAK_ALLOWED;
  436. };
  437. var lineBreakAtIndex = function (codePoints, index) {
  438. // LB2 Never break at the start of text.
  439. if (index === 0) {
  440. return exports.BREAK_NOT_ALLOWED;
  441. }
  442. // LB3 Always break at the end of text.
  443. if (index >= codePoints.length) {
  444. return exports.BREAK_MANDATORY;
  445. }
  446. var _a = exports.codePointsToCharacterClasses(codePoints), indices = _a[0], classTypes = _a[1];
  447. return _lineBreakAtIndex(codePoints, classTypes, indices, index);
  448. };
  449. exports.lineBreakAtIndex = lineBreakAtIndex;
  450. var cssFormattedClasses = function (codePoints, options) {
  451. if (!options) {
  452. options = { lineBreak: 'normal', wordBreak: 'normal' };
  453. }
  454. var _a = exports.codePointsToCharacterClasses(codePoints, options.lineBreak), indicies = _a[0], classTypes = _a[1], isLetterNumber = _a[2];
  455. if (options.wordBreak === 'break-all' || options.wordBreak === 'break-word') {
  456. classTypes = classTypes.map(function (type) { return ([NU, AL, SA].indexOf(type) !== -1 ? ID : type); });
  457. }
  458. var forbiddenBreakpoints = options.wordBreak === 'keep-all'
  459. ? isLetterNumber.map(function (letterNumber, i) {
  460. return letterNumber && codePoints[i] >= 0x4e00 && codePoints[i] <= 0x9fff;
  461. })
  462. : undefined;
  463. return [indicies, classTypes, forbiddenBreakpoints];
  464. };
  465. var inlineBreakOpportunities = function (str, options) {
  466. var codePoints = Util_1.toCodePoints(str);
  467. var output = exports.BREAK_NOT_ALLOWED;
  468. var _a = cssFormattedClasses(codePoints, options), indicies = _a[0], classTypes = _a[1], forbiddenBreakpoints = _a[2];
  469. codePoints.forEach(function (codePoint, i) {
  470. output +=
  471. Util_1.fromCodePoint(codePoint) +
  472. (i >= codePoints.length - 1
  473. ? exports.BREAK_MANDATORY
  474. : _lineBreakAtIndex(codePoints, classTypes, indicies, i + 1, forbiddenBreakpoints));
  475. });
  476. return output;
  477. };
  478. exports.inlineBreakOpportunities = inlineBreakOpportunities;
  479. var Break = /** @class */ (function () {
  480. function Break(codePoints, lineBreak, start, end) {
  481. this.codePoints = codePoints;
  482. this.required = lineBreak === exports.BREAK_MANDATORY;
  483. this.start = start;
  484. this.end = end;
  485. }
  486. Break.prototype.slice = function () {
  487. return Util_1.fromCodePoint.apply(void 0, this.codePoints.slice(this.start, this.end));
  488. };
  489. return Break;
  490. }());
  491. var LineBreaker = function (str, options) {
  492. var codePoints = Util_1.toCodePoints(str);
  493. var _a = cssFormattedClasses(codePoints, options), indicies = _a[0], classTypes = _a[1], forbiddenBreakpoints = _a[2];
  494. var length = codePoints.length;
  495. var lastEnd = 0;
  496. var nextIndex = 0;
  497. return {
  498. next: function () {
  499. if (nextIndex >= length) {
  500. return { done: true, value: null };
  501. }
  502. var lineBreak = exports.BREAK_NOT_ALLOWED;
  503. while (nextIndex < length &&
  504. (lineBreak = _lineBreakAtIndex(codePoints, classTypes, indicies, ++nextIndex, forbiddenBreakpoints)) ===
  505. exports.BREAK_NOT_ALLOWED) { }
  506. if (lineBreak !== exports.BREAK_NOT_ALLOWED || nextIndex === length) {
  507. var value = new Break(codePoints, lineBreak, lastEnd, nextIndex);
  508. lastEnd = nextIndex;
  509. return { value: value, done: false };
  510. }
  511. return { done: true, value: null };
  512. },
  513. };
  514. };
  515. exports.LineBreaker = LineBreaker;
  516. //# sourceMappingURL=LineBreak.js.map