parse.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. var Tokenizer = require('./tokenizer');
  2. var TAB = 9;
  3. var N = 10;
  4. var F = 12;
  5. var R = 13;
  6. var SPACE = 32;
  7. var EXCLAMATIONMARK = 33; // !
  8. var NUMBERSIGN = 35; // #
  9. var AMPERSAND = 38; // &
  10. var APOSTROPHE = 39; // '
  11. var LEFTPARENTHESIS = 40; // (
  12. var RIGHTPARENTHESIS = 41; // )
  13. var ASTERISK = 42; // *
  14. var PLUSSIGN = 43; // +
  15. var COMMA = 44; // ,
  16. var HYPERMINUS = 45; // -
  17. var LESSTHANSIGN = 60; // <
  18. var GREATERTHANSIGN = 62; // >
  19. var QUESTIONMARK = 63; // ?
  20. var COMMERCIALAT = 64; // @
  21. var LEFTSQUAREBRACKET = 91; // [
  22. var RIGHTSQUAREBRACKET = 93; // ]
  23. var LEFTCURLYBRACKET = 123; // {
  24. var VERTICALLINE = 124; // |
  25. var RIGHTCURLYBRACKET = 125; // }
  26. var INFINITY = 8734; // ∞
  27. var NAME_CHAR = createCharMap(function(ch) {
  28. return /[a-zA-Z0-9\-]/.test(ch);
  29. });
  30. var COMBINATOR_PRECEDENCE = {
  31. ' ': 1,
  32. '&&': 2,
  33. '||': 3,
  34. '|': 4
  35. };
  36. function createCharMap(fn) {
  37. var array = typeof Uint32Array === 'function' ? new Uint32Array(128) : new Array(128);
  38. for (var i = 0; i < 128; i++) {
  39. array[i] = fn(String.fromCharCode(i)) ? 1 : 0;
  40. }
  41. return array;
  42. }
  43. function scanSpaces(tokenizer) {
  44. return tokenizer.substringToPos(
  45. tokenizer.findWsEnd(tokenizer.pos)
  46. );
  47. }
  48. function scanWord(tokenizer) {
  49. var end = tokenizer.pos;
  50. for (; end < tokenizer.str.length; end++) {
  51. var code = tokenizer.str.charCodeAt(end);
  52. if (code >= 128 || NAME_CHAR[code] === 0) {
  53. break;
  54. }
  55. }
  56. if (tokenizer.pos === end) {
  57. tokenizer.error('Expect a keyword');
  58. }
  59. return tokenizer.substringToPos(end);
  60. }
  61. function scanNumber(tokenizer) {
  62. var end = tokenizer.pos;
  63. for (; end < tokenizer.str.length; end++) {
  64. var code = tokenizer.str.charCodeAt(end);
  65. if (code < 48 || code > 57) {
  66. break;
  67. }
  68. }
  69. if (tokenizer.pos === end) {
  70. tokenizer.error('Expect a number');
  71. }
  72. return tokenizer.substringToPos(end);
  73. }
  74. function scanString(tokenizer) {
  75. var end = tokenizer.str.indexOf('\'', tokenizer.pos + 1);
  76. if (end === -1) {
  77. tokenizer.pos = tokenizer.str.length;
  78. tokenizer.error('Expect an apostrophe');
  79. }
  80. return tokenizer.substringToPos(end + 1);
  81. }
  82. function readMultiplierRange(tokenizer) {
  83. var min = null;
  84. var max = null;
  85. tokenizer.eat(LEFTCURLYBRACKET);
  86. min = scanNumber(tokenizer);
  87. if (tokenizer.charCode() === COMMA) {
  88. tokenizer.pos++;
  89. if (tokenizer.charCode() !== RIGHTCURLYBRACKET) {
  90. max = scanNumber(tokenizer);
  91. }
  92. } else {
  93. max = min;
  94. }
  95. tokenizer.eat(RIGHTCURLYBRACKET);
  96. return {
  97. min: Number(min),
  98. max: max ? Number(max) : 0
  99. };
  100. }
  101. function readMultiplier(tokenizer) {
  102. var range = null;
  103. var comma = false;
  104. switch (tokenizer.charCode()) {
  105. case ASTERISK:
  106. tokenizer.pos++;
  107. range = {
  108. min: 0,
  109. max: 0
  110. };
  111. break;
  112. case PLUSSIGN:
  113. tokenizer.pos++;
  114. range = {
  115. min: 1,
  116. max: 0
  117. };
  118. break;
  119. case QUESTIONMARK:
  120. tokenizer.pos++;
  121. range = {
  122. min: 0,
  123. max: 1
  124. };
  125. break;
  126. case NUMBERSIGN:
  127. tokenizer.pos++;
  128. comma = true;
  129. if (tokenizer.charCode() === LEFTCURLYBRACKET) {
  130. range = readMultiplierRange(tokenizer);
  131. } else {
  132. range = {
  133. min: 1,
  134. max: 0
  135. };
  136. }
  137. break;
  138. case LEFTCURLYBRACKET:
  139. range = readMultiplierRange(tokenizer);
  140. break;
  141. default:
  142. return null;
  143. }
  144. return {
  145. type: 'Multiplier',
  146. comma: comma,
  147. min: range.min,
  148. max: range.max,
  149. term: null
  150. };
  151. }
  152. function maybeMultiplied(tokenizer, node) {
  153. var multiplier = readMultiplier(tokenizer);
  154. if (multiplier !== null) {
  155. multiplier.term = node;
  156. return multiplier;
  157. }
  158. return node;
  159. }
  160. function maybeToken(tokenizer) {
  161. var ch = tokenizer.peek();
  162. if (ch === '') {
  163. return null;
  164. }
  165. return {
  166. type: 'Token',
  167. value: ch
  168. };
  169. }
  170. function readProperty(tokenizer) {
  171. var name;
  172. tokenizer.eat(LESSTHANSIGN);
  173. tokenizer.eat(APOSTROPHE);
  174. name = scanWord(tokenizer);
  175. tokenizer.eat(APOSTROPHE);
  176. tokenizer.eat(GREATERTHANSIGN);
  177. return maybeMultiplied(tokenizer, {
  178. type: 'Property',
  179. name: name
  180. });
  181. }
  182. // https://drafts.csswg.org/css-values-3/#numeric-ranges
  183. // 4.1. Range Restrictions and Range Definition Notation
  184. //
  185. // Range restrictions can be annotated in the numeric type notation using CSS bracketed
  186. // range notation—[min,max]—within the angle brackets, after the identifying keyword,
  187. // indicating a closed range between (and including) min and max.
  188. // For example, <integer [0, 10]> indicates an integer between 0 and 10, inclusive.
  189. function readTypeRange(tokenizer) {
  190. // use null for Infinity to make AST format JSON serializable/deserializable
  191. var min = null; // -Infinity
  192. var max = null; // Infinity
  193. var sign = 1;
  194. tokenizer.eat(LEFTSQUAREBRACKET);
  195. if (tokenizer.charCode() === HYPERMINUS) {
  196. tokenizer.peek();
  197. sign = -1;
  198. }
  199. if (sign == -1 && tokenizer.charCode() === INFINITY) {
  200. tokenizer.peek();
  201. } else {
  202. min = sign * Number(scanNumber(tokenizer));
  203. }
  204. scanSpaces(tokenizer);
  205. tokenizer.eat(COMMA);
  206. scanSpaces(tokenizer);
  207. if (tokenizer.charCode() === INFINITY) {
  208. tokenizer.peek();
  209. } else {
  210. sign = 1;
  211. if (tokenizer.charCode() === HYPERMINUS) {
  212. tokenizer.peek();
  213. sign = -1;
  214. }
  215. max = sign * Number(scanNumber(tokenizer));
  216. }
  217. tokenizer.eat(RIGHTSQUAREBRACKET);
  218. // If no range is indicated, either by using the bracketed range notation
  219. // or in the property description, then [−∞,∞] is assumed.
  220. if (min === null && max === null) {
  221. return null;
  222. }
  223. return {
  224. type: 'Range',
  225. min: min,
  226. max: max
  227. };
  228. }
  229. function readType(tokenizer) {
  230. var name;
  231. var opts = null;
  232. tokenizer.eat(LESSTHANSIGN);
  233. name = scanWord(tokenizer);
  234. if (tokenizer.charCode() === LEFTPARENTHESIS &&
  235. tokenizer.nextCharCode() === RIGHTPARENTHESIS) {
  236. tokenizer.pos += 2;
  237. name += '()';
  238. }
  239. if (tokenizer.charCodeAt(tokenizer.findWsEnd(tokenizer.pos)) === LEFTSQUAREBRACKET) {
  240. scanSpaces(tokenizer);
  241. opts = readTypeRange(tokenizer);
  242. }
  243. tokenizer.eat(GREATERTHANSIGN);
  244. return maybeMultiplied(tokenizer, {
  245. type: 'Type',
  246. name: name,
  247. opts: opts
  248. });
  249. }
  250. function readKeywordOrFunction(tokenizer) {
  251. var name;
  252. name = scanWord(tokenizer);
  253. if (tokenizer.charCode() === LEFTPARENTHESIS) {
  254. tokenizer.pos++;
  255. return {
  256. type: 'Function',
  257. name: name
  258. };
  259. }
  260. return maybeMultiplied(tokenizer, {
  261. type: 'Keyword',
  262. name: name
  263. });
  264. }
  265. function regroupTerms(terms, combinators) {
  266. function createGroup(terms, combinator) {
  267. return {
  268. type: 'Group',
  269. terms: terms,
  270. combinator: combinator,
  271. disallowEmpty: false,
  272. explicit: false
  273. };
  274. }
  275. combinators = Object.keys(combinators).sort(function(a, b) {
  276. return COMBINATOR_PRECEDENCE[a] - COMBINATOR_PRECEDENCE[b];
  277. });
  278. while (combinators.length > 0) {
  279. var combinator = combinators.shift();
  280. for (var i = 0, subgroupStart = 0; i < terms.length; i++) {
  281. var term = terms[i];
  282. if (term.type === 'Combinator') {
  283. if (term.value === combinator) {
  284. if (subgroupStart === -1) {
  285. subgroupStart = i - 1;
  286. }
  287. terms.splice(i, 1);
  288. i--;
  289. } else {
  290. if (subgroupStart !== -1 && i - subgroupStart > 1) {
  291. terms.splice(
  292. subgroupStart,
  293. i - subgroupStart,
  294. createGroup(terms.slice(subgroupStart, i), combinator)
  295. );
  296. i = subgroupStart + 1;
  297. }
  298. subgroupStart = -1;
  299. }
  300. }
  301. }
  302. if (subgroupStart !== -1 && combinators.length) {
  303. terms.splice(
  304. subgroupStart,
  305. i - subgroupStart,
  306. createGroup(terms.slice(subgroupStart, i), combinator)
  307. );
  308. }
  309. }
  310. return combinator;
  311. }
  312. function readImplicitGroup(tokenizer) {
  313. var terms = [];
  314. var combinators = {};
  315. var token;
  316. var prevToken = null;
  317. var prevTokenPos = tokenizer.pos;
  318. while (token = peek(tokenizer)) {
  319. if (token.type !== 'Spaces') {
  320. if (token.type === 'Combinator') {
  321. // check for combinator in group beginning and double combinator sequence
  322. if (prevToken === null || prevToken.type === 'Combinator') {
  323. tokenizer.pos = prevTokenPos;
  324. tokenizer.error('Unexpected combinator');
  325. }
  326. combinators[token.value] = true;
  327. } else if (prevToken !== null && prevToken.type !== 'Combinator') {
  328. combinators[' '] = true; // a b
  329. terms.push({
  330. type: 'Combinator',
  331. value: ' '
  332. });
  333. }
  334. terms.push(token);
  335. prevToken = token;
  336. prevTokenPos = tokenizer.pos;
  337. }
  338. }
  339. // check for combinator in group ending
  340. if (prevToken !== null && prevToken.type === 'Combinator') {
  341. tokenizer.pos -= prevTokenPos;
  342. tokenizer.error('Unexpected combinator');
  343. }
  344. return {
  345. type: 'Group',
  346. terms: terms,
  347. combinator: regroupTerms(terms, combinators) || ' ',
  348. disallowEmpty: false,
  349. explicit: false
  350. };
  351. }
  352. function readGroup(tokenizer) {
  353. var result;
  354. tokenizer.eat(LEFTSQUAREBRACKET);
  355. result = readImplicitGroup(tokenizer);
  356. tokenizer.eat(RIGHTSQUAREBRACKET);
  357. result.explicit = true;
  358. if (tokenizer.charCode() === EXCLAMATIONMARK) {
  359. tokenizer.pos++;
  360. result.disallowEmpty = true;
  361. }
  362. return result;
  363. }
  364. function peek(tokenizer) {
  365. var code = tokenizer.charCode();
  366. if (code < 128 && NAME_CHAR[code] === 1) {
  367. return readKeywordOrFunction(tokenizer);
  368. }
  369. switch (code) {
  370. case RIGHTSQUAREBRACKET:
  371. // don't eat, stop scan a group
  372. break;
  373. case LEFTSQUAREBRACKET:
  374. return maybeMultiplied(tokenizer, readGroup(tokenizer));
  375. case LESSTHANSIGN:
  376. return tokenizer.nextCharCode() === APOSTROPHE
  377. ? readProperty(tokenizer)
  378. : readType(tokenizer);
  379. case VERTICALLINE:
  380. return {
  381. type: 'Combinator',
  382. value: tokenizer.substringToPos(
  383. tokenizer.nextCharCode() === VERTICALLINE
  384. ? tokenizer.pos + 2
  385. : tokenizer.pos + 1
  386. )
  387. };
  388. case AMPERSAND:
  389. tokenizer.pos++;
  390. tokenizer.eat(AMPERSAND);
  391. return {
  392. type: 'Combinator',
  393. value: '&&'
  394. };
  395. case COMMA:
  396. tokenizer.pos++;
  397. return {
  398. type: 'Comma'
  399. };
  400. case APOSTROPHE:
  401. return maybeMultiplied(tokenizer, {
  402. type: 'String',
  403. value: scanString(tokenizer)
  404. });
  405. case SPACE:
  406. case TAB:
  407. case N:
  408. case R:
  409. case F:
  410. return {
  411. type: 'Spaces',
  412. value: scanSpaces(tokenizer)
  413. };
  414. case COMMERCIALAT:
  415. code = tokenizer.nextCharCode();
  416. if (code < 128 && NAME_CHAR[code] === 1) {
  417. tokenizer.pos++;
  418. return {
  419. type: 'AtKeyword',
  420. name: scanWord(tokenizer)
  421. };
  422. }
  423. return maybeToken(tokenizer);
  424. case ASTERISK:
  425. case PLUSSIGN:
  426. case QUESTIONMARK:
  427. case NUMBERSIGN:
  428. case EXCLAMATIONMARK:
  429. // prohibited tokens (used as a multiplier start)
  430. break;
  431. case LEFTCURLYBRACKET:
  432. // LEFTCURLYBRACKET is allowed since mdn/data uses it w/o quoting
  433. // check next char isn't a number, because it's likely a disjoined multiplier
  434. code = tokenizer.nextCharCode();
  435. if (code < 48 || code > 57) {
  436. return maybeToken(tokenizer);
  437. }
  438. break;
  439. default:
  440. return maybeToken(tokenizer);
  441. }
  442. }
  443. function parse(source) {
  444. var tokenizer = new Tokenizer(source);
  445. var result = readImplicitGroup(tokenizer);
  446. if (tokenizer.pos !== source.length) {
  447. tokenizer.error('Unexpected input');
  448. }
  449. // reduce redundant groups with single group term
  450. if (result.terms.length === 1 && result.terms[0].type === 'Group') {
  451. result = result.terms[0];
  452. }
  453. return result;
  454. }
  455. // warm up parse to elimitate code branches that never execute
  456. // fix soft deoptimizations (insufficient type feedback)
  457. parse('[a&&<b>#|<\'c\'>*||e() f{2} /,(% g#{1,2} h{2,})]!');
  458. module.exports = parse;