match.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629
  1. var hasOwnProperty = Object.prototype.hasOwnProperty;
  2. var matchGraph = require('./match-graph');
  3. var MATCH = matchGraph.MATCH;
  4. var MISMATCH = matchGraph.MISMATCH;
  5. var DISALLOW_EMPTY = matchGraph.DISALLOW_EMPTY;
  6. var TYPE = require('../tokenizer/const').TYPE;
  7. var STUB = 0;
  8. var TOKEN = 1;
  9. var OPEN_SYNTAX = 2;
  10. var CLOSE_SYNTAX = 3;
  11. var EXIT_REASON_MATCH = 'Match';
  12. var EXIT_REASON_MISMATCH = 'Mismatch';
  13. var EXIT_REASON_ITERATION_LIMIT = 'Maximum iteration number exceeded (please fill an issue on https://github.com/csstree/csstree/issues)';
  14. var ITERATION_LIMIT = 15000;
  15. var totalIterationCount = 0;
  16. function reverseList(list) {
  17. var prev = null;
  18. var next = null;
  19. var item = list;
  20. while (item !== null) {
  21. next = item.prev;
  22. item.prev = prev;
  23. prev = item;
  24. item = next;
  25. }
  26. return prev;
  27. }
  28. function areStringsEqualCaseInsensitive(testStr, referenceStr) {
  29. if (testStr.length !== referenceStr.length) {
  30. return false;
  31. }
  32. for (var i = 0; i < testStr.length; i++) {
  33. var testCode = testStr.charCodeAt(i);
  34. var referenceCode = referenceStr.charCodeAt(i);
  35. // testCode.toLowerCase() for U+0041 LATIN CAPITAL LETTER A (A) .. U+005A LATIN CAPITAL LETTER Z (Z).
  36. if (testCode >= 0x0041 && testCode <= 0x005A) {
  37. testCode = testCode | 32;
  38. }
  39. if (testCode !== referenceCode) {
  40. return false;
  41. }
  42. }
  43. return true;
  44. }
  45. function isCommaContextStart(token) {
  46. if (token === null) {
  47. return true;
  48. }
  49. return (
  50. token.type === TYPE.Comma ||
  51. token.type === TYPE.Function ||
  52. token.type === TYPE.LeftParenthesis ||
  53. token.type === TYPE.LeftSquareBracket ||
  54. token.type === TYPE.LeftCurlyBracket ||
  55. token.type === TYPE.Delim
  56. );
  57. }
  58. function isCommaContextEnd(token) {
  59. if (token === null) {
  60. return true;
  61. }
  62. return (
  63. token.type === TYPE.RightParenthesis ||
  64. token.type === TYPE.RightSquareBracket ||
  65. token.type === TYPE.RightCurlyBracket ||
  66. token.type === TYPE.Delim
  67. );
  68. }
  69. function internalMatch(tokens, state, syntaxes) {
  70. function moveToNextToken() {
  71. do {
  72. tokenIndex++;
  73. token = tokenIndex < tokens.length ? tokens[tokenIndex] : null;
  74. } while (token !== null && (token.type === TYPE.WhiteSpace || token.type === TYPE.Comment));
  75. }
  76. function getNextToken(offset) {
  77. var nextIndex = tokenIndex + offset;
  78. return nextIndex < tokens.length ? tokens[nextIndex] : null;
  79. }
  80. function stateSnapshotFromSyntax(nextState, prev) {
  81. return {
  82. nextState: nextState,
  83. matchStack: matchStack,
  84. syntaxStack: syntaxStack,
  85. thenStack: thenStack,
  86. tokenIndex: tokenIndex,
  87. prev: prev
  88. };
  89. }
  90. function pushThenStack(nextState) {
  91. thenStack = {
  92. nextState: nextState,
  93. matchStack: matchStack,
  94. syntaxStack: syntaxStack,
  95. prev: thenStack
  96. };
  97. }
  98. function pushElseStack(nextState) {
  99. elseStack = stateSnapshotFromSyntax(nextState, elseStack);
  100. }
  101. function addTokenToMatch() {
  102. matchStack = {
  103. type: TOKEN,
  104. syntax: state.syntax,
  105. token: token,
  106. prev: matchStack
  107. };
  108. moveToNextToken();
  109. syntaxStash = null;
  110. if (tokenIndex > longestMatch) {
  111. longestMatch = tokenIndex;
  112. }
  113. }
  114. function openSyntax() {
  115. syntaxStack = {
  116. syntax: state.syntax,
  117. opts: state.syntax.opts || (syntaxStack !== null && syntaxStack.opts) || null,
  118. prev: syntaxStack
  119. };
  120. matchStack = {
  121. type: OPEN_SYNTAX,
  122. syntax: state.syntax,
  123. token: matchStack.token,
  124. prev: matchStack
  125. };
  126. }
  127. function closeSyntax() {
  128. if (matchStack.type === OPEN_SYNTAX) {
  129. matchStack = matchStack.prev;
  130. } else {
  131. matchStack = {
  132. type: CLOSE_SYNTAX,
  133. syntax: syntaxStack.syntax,
  134. token: matchStack.token,
  135. prev: matchStack
  136. };
  137. }
  138. syntaxStack = syntaxStack.prev;
  139. }
  140. var syntaxStack = null;
  141. var thenStack = null;
  142. var elseStack = null;
  143. // null – stashing allowed, nothing stashed
  144. // false – stashing disabled, nothing stashed
  145. // anithing else – fail stashable syntaxes, some syntax stashed
  146. var syntaxStash = null;
  147. var iterationCount = 0; // count iterations and prevent infinite loop
  148. var exitReason = null;
  149. var token = null;
  150. var tokenIndex = -1;
  151. var longestMatch = 0;
  152. var matchStack = {
  153. type: STUB,
  154. syntax: null,
  155. token: null,
  156. prev: null
  157. };
  158. moveToNextToken();
  159. while (exitReason === null && ++iterationCount < ITERATION_LIMIT) {
  160. // function mapList(list, fn) {
  161. // var result = [];
  162. // while (list) {
  163. // result.unshift(fn(list));
  164. // list = list.prev;
  165. // }
  166. // return result;
  167. // }
  168. // console.log('--\n',
  169. // '#' + iterationCount,
  170. // require('util').inspect({
  171. // match: mapList(matchStack, x => x.type === TOKEN ? x.token && x.token.value : x.syntax ? ({ [OPEN_SYNTAX]: '<', [CLOSE_SYNTAX]: '</' }[x.type] || x.type) + '!' + x.syntax.name : null),
  172. // token: token && token.value,
  173. // tokenIndex,
  174. // syntax: syntax.type + (syntax.id ? ' #' + syntax.id : '')
  175. // }, { depth: null })
  176. // );
  177. switch (state.type) {
  178. case 'Match':
  179. if (thenStack === null) {
  180. // turn to MISMATCH when some tokens left unmatched
  181. if (token !== null) {
  182. // doesn't mismatch if just one token left and it's an IE hack
  183. if (tokenIndex !== tokens.length - 1 || (token.value !== '\\0' && token.value !== '\\9')) {
  184. state = MISMATCH;
  185. break;
  186. }
  187. }
  188. // break the main loop, return a result - MATCH
  189. exitReason = EXIT_REASON_MATCH;
  190. break;
  191. }
  192. // go to next syntax (`then` branch)
  193. state = thenStack.nextState;
  194. // check match is not empty
  195. if (state === DISALLOW_EMPTY) {
  196. if (thenStack.matchStack === matchStack) {
  197. state = MISMATCH;
  198. break;
  199. } else {
  200. state = MATCH;
  201. }
  202. }
  203. // close syntax if needed
  204. while (thenStack.syntaxStack !== syntaxStack) {
  205. closeSyntax();
  206. }
  207. // pop stack
  208. thenStack = thenStack.prev;
  209. break;
  210. case 'Mismatch':
  211. // when some syntax is stashed
  212. if (syntaxStash !== null && syntaxStash !== false) {
  213. // there is no else branches or a branch reduce match stack
  214. if (elseStack === null || tokenIndex > elseStack.tokenIndex) {
  215. // restore state from the stash
  216. elseStack = syntaxStash;
  217. syntaxStash = false; // disable stashing
  218. }
  219. } else if (elseStack === null) {
  220. // no else branches -> break the main loop
  221. // return a result - MISMATCH
  222. exitReason = EXIT_REASON_MISMATCH;
  223. break;
  224. }
  225. // go to next syntax (`else` branch)
  226. state = elseStack.nextState;
  227. // restore all the rest stack states
  228. thenStack = elseStack.thenStack;
  229. syntaxStack = elseStack.syntaxStack;
  230. matchStack = elseStack.matchStack;
  231. tokenIndex = elseStack.tokenIndex;
  232. token = tokenIndex < tokens.length ? tokens[tokenIndex] : null;
  233. // pop stack
  234. elseStack = elseStack.prev;
  235. break;
  236. case 'MatchGraph':
  237. state = state.match;
  238. break;
  239. case 'If':
  240. // IMPORTANT: else stack push must go first,
  241. // since it stores the state of thenStack before changes
  242. if (state.else !== MISMATCH) {
  243. pushElseStack(state.else);
  244. }
  245. if (state.then !== MATCH) {
  246. pushThenStack(state.then);
  247. }
  248. state = state.match;
  249. break;
  250. case 'MatchOnce':
  251. state = {
  252. type: 'MatchOnceBuffer',
  253. syntax: state,
  254. index: 0,
  255. mask: 0
  256. };
  257. break;
  258. case 'MatchOnceBuffer':
  259. var terms = state.syntax.terms;
  260. if (state.index === terms.length) {
  261. // no matches at all or it's required all terms to be matched
  262. if (state.mask === 0 || state.syntax.all) {
  263. state = MISMATCH;
  264. break;
  265. }
  266. // a partial match is ok
  267. state = MATCH;
  268. break;
  269. }
  270. // all terms are matched
  271. if (state.mask === (1 << terms.length) - 1) {
  272. state = MATCH;
  273. break;
  274. }
  275. for (; state.index < terms.length; state.index++) {
  276. var matchFlag = 1 << state.index;
  277. if ((state.mask & matchFlag) === 0) {
  278. // IMPORTANT: else stack push must go first,
  279. // since it stores the state of thenStack before changes
  280. pushElseStack(state);
  281. pushThenStack({
  282. type: 'AddMatchOnce',
  283. syntax: state.syntax,
  284. mask: state.mask | matchFlag
  285. });
  286. // match
  287. state = terms[state.index++];
  288. break;
  289. }
  290. }
  291. break;
  292. case 'AddMatchOnce':
  293. state = {
  294. type: 'MatchOnceBuffer',
  295. syntax: state.syntax,
  296. index: 0,
  297. mask: state.mask
  298. };
  299. break;
  300. case 'Enum':
  301. if (token !== null) {
  302. var name = token.value.toLowerCase();
  303. // drop \0 and \9 hack from keyword name
  304. if (name.indexOf('\\') !== -1) {
  305. name = name.replace(/\\[09].*$/, '');
  306. }
  307. if (hasOwnProperty.call(state.map, name)) {
  308. state = state.map[name];
  309. break;
  310. }
  311. }
  312. state = MISMATCH;
  313. break;
  314. case 'Generic':
  315. var opts = syntaxStack !== null ? syntaxStack.opts : null;
  316. var lastTokenIndex = tokenIndex + Math.floor(state.fn(token, getNextToken, opts));
  317. if (!isNaN(lastTokenIndex) && lastTokenIndex > tokenIndex) {
  318. while (tokenIndex < lastTokenIndex) {
  319. addTokenToMatch();
  320. }
  321. state = MATCH;
  322. } else {
  323. state = MISMATCH;
  324. }
  325. break;
  326. case 'Type':
  327. case 'Property':
  328. var syntaxDict = state.type === 'Type' ? 'types' : 'properties';
  329. var dictSyntax = hasOwnProperty.call(syntaxes, syntaxDict) ? syntaxes[syntaxDict][state.name] : null;
  330. if (!dictSyntax || !dictSyntax.match) {
  331. throw new Error(
  332. 'Bad syntax reference: ' +
  333. (state.type === 'Type'
  334. ? '<' + state.name + '>'
  335. : '<\'' + state.name + '\'>')
  336. );
  337. }
  338. // stash a syntax for types with low priority
  339. if (syntaxStash !== false && token !== null && state.type === 'Type') {
  340. var lowPriorityMatching =
  341. // https://drafts.csswg.org/css-values-4/#custom-idents
  342. // When parsing positionally-ambiguous keywords in a property value, a <custom-ident> production
  343. // can only claim the keyword if no other unfulfilled production can claim it.
  344. (state.name === 'custom-ident' && token.type === TYPE.Ident) ||
  345. // https://drafts.csswg.org/css-values-4/#lengths
  346. // ... if a `0` could be parsed as either a <number> or a <length> in a property (such as line-height),
  347. // it must parse as a <number>
  348. (state.name === 'length' && token.value === '0');
  349. if (lowPriorityMatching) {
  350. if (syntaxStash === null) {
  351. syntaxStash = stateSnapshotFromSyntax(state, elseStack);
  352. }
  353. state = MISMATCH;
  354. break;
  355. }
  356. }
  357. openSyntax();
  358. state = dictSyntax.match;
  359. break;
  360. case 'Keyword':
  361. var name = state.name;
  362. if (token !== null) {
  363. var keywordName = token.value;
  364. // drop \0 and \9 hack from keyword name
  365. if (keywordName.indexOf('\\') !== -1) {
  366. keywordName = keywordName.replace(/\\[09].*$/, '');
  367. }
  368. if (areStringsEqualCaseInsensitive(keywordName, name)) {
  369. addTokenToMatch();
  370. state = MATCH;
  371. break;
  372. }
  373. }
  374. state = MISMATCH;
  375. break;
  376. case 'AtKeyword':
  377. case 'Function':
  378. if (token !== null && areStringsEqualCaseInsensitive(token.value, state.name)) {
  379. addTokenToMatch();
  380. state = MATCH;
  381. break;
  382. }
  383. state = MISMATCH;
  384. break;
  385. case 'Token':
  386. if (token !== null && token.value === state.value) {
  387. addTokenToMatch();
  388. state = MATCH;
  389. break;
  390. }
  391. state = MISMATCH;
  392. break;
  393. case 'Comma':
  394. if (token !== null && token.type === TYPE.Comma) {
  395. if (isCommaContextStart(matchStack.token)) {
  396. state = MISMATCH;
  397. } else {
  398. addTokenToMatch();
  399. state = isCommaContextEnd(token) ? MISMATCH : MATCH;
  400. }
  401. } else {
  402. state = isCommaContextStart(matchStack.token) || isCommaContextEnd(token) ? MATCH : MISMATCH;
  403. }
  404. break;
  405. case 'String':
  406. var string = '';
  407. for (var lastTokenIndex = tokenIndex; lastTokenIndex < tokens.length && string.length < state.value.length; lastTokenIndex++) {
  408. string += tokens[lastTokenIndex].value;
  409. }
  410. if (areStringsEqualCaseInsensitive(string, state.value)) {
  411. while (tokenIndex < lastTokenIndex) {
  412. addTokenToMatch();
  413. }
  414. state = MATCH;
  415. } else {
  416. state = MISMATCH;
  417. }
  418. break;
  419. default:
  420. throw new Error('Unknown node type: ' + state.type);
  421. }
  422. }
  423. totalIterationCount += iterationCount;
  424. switch (exitReason) {
  425. case null:
  426. console.warn('[csstree-match] BREAK after ' + ITERATION_LIMIT + ' iterations');
  427. exitReason = EXIT_REASON_ITERATION_LIMIT;
  428. matchStack = null;
  429. break;
  430. case EXIT_REASON_MATCH:
  431. while (syntaxStack !== null) {
  432. closeSyntax();
  433. }
  434. break;
  435. default:
  436. matchStack = null;
  437. }
  438. return {
  439. tokens: tokens,
  440. reason: exitReason,
  441. iterations: iterationCount,
  442. match: matchStack,
  443. longestMatch: longestMatch
  444. };
  445. }
  446. function matchAsList(tokens, matchGraph, syntaxes) {
  447. var matchResult = internalMatch(tokens, matchGraph, syntaxes || {});
  448. if (matchResult.match !== null) {
  449. var item = reverseList(matchResult.match).prev;
  450. matchResult.match = [];
  451. while (item !== null) {
  452. switch (item.type) {
  453. case STUB:
  454. break;
  455. case OPEN_SYNTAX:
  456. case CLOSE_SYNTAX:
  457. matchResult.match.push({
  458. type: item.type,
  459. syntax: item.syntax
  460. });
  461. break;
  462. default:
  463. matchResult.match.push({
  464. token: item.token.value,
  465. node: item.token.node
  466. });
  467. break;
  468. }
  469. item = item.prev;
  470. }
  471. }
  472. return matchResult;
  473. }
  474. function matchAsTree(tokens, matchGraph, syntaxes) {
  475. var matchResult = internalMatch(tokens, matchGraph, syntaxes || {});
  476. if (matchResult.match === null) {
  477. return matchResult;
  478. }
  479. var item = matchResult.match;
  480. var host = matchResult.match = {
  481. syntax: matchGraph.syntax || null,
  482. match: []
  483. };
  484. var hostStack = [host];
  485. // revert a list and start with 2nd item since 1st is a stub item
  486. item = reverseList(item).prev;
  487. // build a tree
  488. while (item !== null) {
  489. switch (item.type) {
  490. case OPEN_SYNTAX:
  491. host.match.push(host = {
  492. syntax: item.syntax,
  493. match: []
  494. });
  495. hostStack.push(host);
  496. break;
  497. case CLOSE_SYNTAX:
  498. hostStack.pop();
  499. host = hostStack[hostStack.length - 1];
  500. break;
  501. default:
  502. host.match.push({
  503. syntax: item.syntax || null,
  504. token: item.token.value,
  505. node: item.token.node
  506. });
  507. }
  508. item = item.prev;
  509. }
  510. return matchResult;
  511. }
  512. module.exports = {
  513. matchAsList: matchAsList,
  514. matchAsTree: matchAsTree,
  515. getTotalIterationCount: function() {
  516. return totalIterationCount;
  517. }
  518. };