123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568 |
- var Tokenizer = require('./tokenizer');
- var TAB = 9;
- var N = 10;
- var F = 12;
- var R = 13;
- var SPACE = 32;
- var EXCLAMATIONMARK = 33; // !
- var NUMBERSIGN = 35; // #
- var AMPERSAND = 38; // &
- var APOSTROPHE = 39; // '
- var LEFTPARENTHESIS = 40; // (
- var RIGHTPARENTHESIS = 41; // )
- var ASTERISK = 42; // *
- var PLUSSIGN = 43; // +
- var COMMA = 44; // ,
- var HYPERMINUS = 45; // -
- var LESSTHANSIGN = 60; // <
- var GREATERTHANSIGN = 62; // >
- var QUESTIONMARK = 63; // ?
- var COMMERCIALAT = 64; // @
- var LEFTSQUAREBRACKET = 91; // [
- var RIGHTSQUAREBRACKET = 93; // ]
- var LEFTCURLYBRACKET = 123; // {
- var VERTICALLINE = 124; // |
- var RIGHTCURLYBRACKET = 125; // }
- var INFINITY = 8734; // ∞
- var NAME_CHAR = createCharMap(function(ch) {
- return /[a-zA-Z0-9\-]/.test(ch);
- });
- var COMBINATOR_PRECEDENCE = {
- ' ': 1,
- '&&': 2,
- '||': 3,
- '|': 4
- };
- function createCharMap(fn) {
- var array = typeof Uint32Array === 'function' ? new Uint32Array(128) : new Array(128);
- for (var i = 0; i < 128; i++) {
- array[i] = fn(String.fromCharCode(i)) ? 1 : 0;
- }
- return array;
- }
- function scanSpaces(tokenizer) {
- return tokenizer.substringToPos(
- tokenizer.findWsEnd(tokenizer.pos)
- );
- }
- function scanWord(tokenizer) {
- var end = tokenizer.pos;
- for (; end < tokenizer.str.length; end++) {
- var code = tokenizer.str.charCodeAt(end);
- if (code >= 128 || NAME_CHAR[code] === 0) {
- break;
- }
- }
- if (tokenizer.pos === end) {
- tokenizer.error('Expect a keyword');
- }
- return tokenizer.substringToPos(end);
- }
- function scanNumber(tokenizer) {
- var end = tokenizer.pos;
- for (; end < tokenizer.str.length; end++) {
- var code = tokenizer.str.charCodeAt(end);
- if (code < 48 || code > 57) {
- break;
- }
- }
- if (tokenizer.pos === end) {
- tokenizer.error('Expect a number');
- }
- return tokenizer.substringToPos(end);
- }
- function scanString(tokenizer) {
- var end = tokenizer.str.indexOf('\'', tokenizer.pos + 1);
- if (end === -1) {
- tokenizer.pos = tokenizer.str.length;
- tokenizer.error('Expect an apostrophe');
- }
- return tokenizer.substringToPos(end + 1);
- }
- function readMultiplierRange(tokenizer) {
- var min = null;
- var max = null;
- tokenizer.eat(LEFTCURLYBRACKET);
- min = scanNumber(tokenizer);
- if (tokenizer.charCode() === COMMA) {
- tokenizer.pos++;
- if (tokenizer.charCode() !== RIGHTCURLYBRACKET) {
- max = scanNumber(tokenizer);
- }
- } else {
- max = min;
- }
- tokenizer.eat(RIGHTCURLYBRACKET);
- return {
- min: Number(min),
- max: max ? Number(max) : 0
- };
- }
- function readMultiplier(tokenizer) {
- var range = null;
- var comma = false;
- switch (tokenizer.charCode()) {
- case ASTERISK:
- tokenizer.pos++;
- range = {
- min: 0,
- max: 0
- };
- break;
- case PLUSSIGN:
- tokenizer.pos++;
- range = {
- min: 1,
- max: 0
- };
- break;
- case QUESTIONMARK:
- tokenizer.pos++;
- range = {
- min: 0,
- max: 1
- };
- break;
- case NUMBERSIGN:
- tokenizer.pos++;
- comma = true;
- if (tokenizer.charCode() === LEFTCURLYBRACKET) {
- range = readMultiplierRange(tokenizer);
- } else {
- range = {
- min: 1,
- max: 0
- };
- }
- break;
- case LEFTCURLYBRACKET:
- range = readMultiplierRange(tokenizer);
- break;
- default:
- return null;
- }
- return {
- type: 'Multiplier',
- comma: comma,
- min: range.min,
- max: range.max,
- term: null
- };
- }
- function maybeMultiplied(tokenizer, node) {
- var multiplier = readMultiplier(tokenizer);
- if (multiplier !== null) {
- multiplier.term = node;
- return multiplier;
- }
- return node;
- }
- function maybeToken(tokenizer) {
- var ch = tokenizer.peek();
- if (ch === '') {
- return null;
- }
- return {
- type: 'Token',
- value: ch
- };
- }
- function readProperty(tokenizer) {
- var name;
- tokenizer.eat(LESSTHANSIGN);
- tokenizer.eat(APOSTROPHE);
- name = scanWord(tokenizer);
- tokenizer.eat(APOSTROPHE);
- tokenizer.eat(GREATERTHANSIGN);
- return maybeMultiplied(tokenizer, {
- type: 'Property',
- name: name
- });
- }
- // https://drafts.csswg.org/css-values-3/#numeric-ranges
- // 4.1. Range Restrictions and Range Definition Notation
- //
- // Range restrictions can be annotated in the numeric type notation using CSS bracketed
- // range notation—[min,max]—within the angle brackets, after the identifying keyword,
- // indicating a closed range between (and including) min and max.
- // For example, <integer [0, 10]> indicates an integer between 0 and 10, inclusive.
- function readTypeRange(tokenizer) {
- // use null for Infinity to make AST format JSON serializable/deserializable
- var min = null; // -Infinity
- var max = null; // Infinity
- var sign = 1;
- tokenizer.eat(LEFTSQUAREBRACKET);
- if (tokenizer.charCode() === HYPERMINUS) {
- tokenizer.peek();
- sign = -1;
- }
- if (sign == -1 && tokenizer.charCode() === INFINITY) {
- tokenizer.peek();
- } else {
- min = sign * Number(scanNumber(tokenizer));
- }
- scanSpaces(tokenizer);
- tokenizer.eat(COMMA);
- scanSpaces(tokenizer);
- if (tokenizer.charCode() === INFINITY) {
- tokenizer.peek();
- } else {
- sign = 1;
- if (tokenizer.charCode() === HYPERMINUS) {
- tokenizer.peek();
- sign = -1;
- }
- max = sign * Number(scanNumber(tokenizer));
- }
- tokenizer.eat(RIGHTSQUAREBRACKET);
- // If no range is indicated, either by using the bracketed range notation
- // or in the property description, then [−∞,∞] is assumed.
- if (min === null && max === null) {
- return null;
- }
- return {
- type: 'Range',
- min: min,
- max: max
- };
- }
- function readType(tokenizer) {
- var name;
- var opts = null;
- tokenizer.eat(LESSTHANSIGN);
- name = scanWord(tokenizer);
- if (tokenizer.charCode() === LEFTPARENTHESIS &&
- tokenizer.nextCharCode() === RIGHTPARENTHESIS) {
- tokenizer.pos += 2;
- name += '()';
- }
- if (tokenizer.charCodeAt(tokenizer.findWsEnd(tokenizer.pos)) === LEFTSQUAREBRACKET) {
- scanSpaces(tokenizer);
- opts = readTypeRange(tokenizer);
- }
- tokenizer.eat(GREATERTHANSIGN);
- return maybeMultiplied(tokenizer, {
- type: 'Type',
- name: name,
- opts: opts
- });
- }
- function readKeywordOrFunction(tokenizer) {
- var name;
- name = scanWord(tokenizer);
- if (tokenizer.charCode() === LEFTPARENTHESIS) {
- tokenizer.pos++;
- return {
- type: 'Function',
- name: name
- };
- }
- return maybeMultiplied(tokenizer, {
- type: 'Keyword',
- name: name
- });
- }
- function regroupTerms(terms, combinators) {
- function createGroup(terms, combinator) {
- return {
- type: 'Group',
- terms: terms,
- combinator: combinator,
- disallowEmpty: false,
- explicit: false
- };
- }
- combinators = Object.keys(combinators).sort(function(a, b) {
- return COMBINATOR_PRECEDENCE[a] - COMBINATOR_PRECEDENCE[b];
- });
- while (combinators.length > 0) {
- var combinator = combinators.shift();
- for (var i = 0, subgroupStart = 0; i < terms.length; i++) {
- var term = terms[i];
- if (term.type === 'Combinator') {
- if (term.value === combinator) {
- if (subgroupStart === -1) {
- subgroupStart = i - 1;
- }
- terms.splice(i, 1);
- i--;
- } else {
- if (subgroupStart !== -1 && i - subgroupStart > 1) {
- terms.splice(
- subgroupStart,
- i - subgroupStart,
- createGroup(terms.slice(subgroupStart, i), combinator)
- );
- i = subgroupStart + 1;
- }
- subgroupStart = -1;
- }
- }
- }
- if (subgroupStart !== -1 && combinators.length) {
- terms.splice(
- subgroupStart,
- i - subgroupStart,
- createGroup(terms.slice(subgroupStart, i), combinator)
- );
- }
- }
- return combinator;
- }
- function readImplicitGroup(tokenizer) {
- var terms = [];
- var combinators = {};
- var token;
- var prevToken = null;
- var prevTokenPos = tokenizer.pos;
- while (token = peek(tokenizer)) {
- if (token.type !== 'Spaces') {
- if (token.type === 'Combinator') {
- // check for combinator in group beginning and double combinator sequence
- if (prevToken === null || prevToken.type === 'Combinator') {
- tokenizer.pos = prevTokenPos;
- tokenizer.error('Unexpected combinator');
- }
- combinators[token.value] = true;
- } else if (prevToken !== null && prevToken.type !== 'Combinator') {
- combinators[' '] = true; // a b
- terms.push({
- type: 'Combinator',
- value: ' '
- });
- }
- terms.push(token);
- prevToken = token;
- prevTokenPos = tokenizer.pos;
- }
- }
- // check for combinator in group ending
- if (prevToken !== null && prevToken.type === 'Combinator') {
- tokenizer.pos -= prevTokenPos;
- tokenizer.error('Unexpected combinator');
- }
- return {
- type: 'Group',
- terms: terms,
- combinator: regroupTerms(terms, combinators) || ' ',
- disallowEmpty: false,
- explicit: false
- };
- }
- function readGroup(tokenizer) {
- var result;
- tokenizer.eat(LEFTSQUAREBRACKET);
- result = readImplicitGroup(tokenizer);
- tokenizer.eat(RIGHTSQUAREBRACKET);
- result.explicit = true;
- if (tokenizer.charCode() === EXCLAMATIONMARK) {
- tokenizer.pos++;
- result.disallowEmpty = true;
- }
- return result;
- }
- function peek(tokenizer) {
- var code = tokenizer.charCode();
- if (code < 128 && NAME_CHAR[code] === 1) {
- return readKeywordOrFunction(tokenizer);
- }
- switch (code) {
- case RIGHTSQUAREBRACKET:
- // don't eat, stop scan a group
- break;
- case LEFTSQUAREBRACKET:
- return maybeMultiplied(tokenizer, readGroup(tokenizer));
- case LESSTHANSIGN:
- return tokenizer.nextCharCode() === APOSTROPHE
- ? readProperty(tokenizer)
- : readType(tokenizer);
- case VERTICALLINE:
- return {
- type: 'Combinator',
- value: tokenizer.substringToPos(
- tokenizer.nextCharCode() === VERTICALLINE
- ? tokenizer.pos + 2
- : tokenizer.pos + 1
- )
- };
- case AMPERSAND:
- tokenizer.pos++;
- tokenizer.eat(AMPERSAND);
- return {
- type: 'Combinator',
- value: '&&'
- };
- case COMMA:
- tokenizer.pos++;
- return {
- type: 'Comma'
- };
- case APOSTROPHE:
- return maybeMultiplied(tokenizer, {
- type: 'String',
- value: scanString(tokenizer)
- });
- case SPACE:
- case TAB:
- case N:
- case R:
- case F:
- return {
- type: 'Spaces',
- value: scanSpaces(tokenizer)
- };
- case COMMERCIALAT:
- code = tokenizer.nextCharCode();
- if (code < 128 && NAME_CHAR[code] === 1) {
- tokenizer.pos++;
- return {
- type: 'AtKeyword',
- name: scanWord(tokenizer)
- };
- }
- return maybeToken(tokenizer);
- case ASTERISK:
- case PLUSSIGN:
- case QUESTIONMARK:
- case NUMBERSIGN:
- case EXCLAMATIONMARK:
- // prohibited tokens (used as a multiplier start)
- break;
- case LEFTCURLYBRACKET:
- // LEFTCURLYBRACKET is allowed since mdn/data uses it w/o quoting
- // check next char isn't a number, because it's likely a disjoined multiplier
- code = tokenizer.nextCharCode();
- if (code < 48 || code > 57) {
- return maybeToken(tokenizer);
- }
- break;
- default:
- return maybeToken(tokenizer);
- }
- }
- function parse(source) {
- var tokenizer = new Tokenizer(source);
- var result = readImplicitGroup(tokenizer);
- if (tokenizer.pos !== source.length) {
- tokenizer.error('Unexpected input');
- }
- // reduce redundant groups with single group term
- if (result.terms.length === 1 && result.terms[0].type === 'Group') {
- result = result.terms[0];
- }
- return result;
- }
- // warm up parse to elimitate code branches that never execute
- // fix soft deoptimizations (insufficient type feedback)
- parse('[a&&<b>#|<\'c\'>*||e() f{2} /,(% g#{1,2} h{2,})]!');
- module.exports = parse;
|