123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249 |
- var EOF = 0;
- // https://drafts.csswg.org/css-syntax-3/
- // § 4.2. Definitions
- // digit
- // A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9).
- function isDigit(code) {
- return code >= 0x0030 && code <= 0x0039;
- }
- // hex digit
- // A digit, or a code point between U+0041 LATIN CAPITAL LETTER A (A) and U+0046 LATIN CAPITAL LETTER F (F),
- // or a code point between U+0061 LATIN SMALL LETTER A (a) and U+0066 LATIN SMALL LETTER F (f).
- function isHexDigit(code) {
- return (
- isDigit(code) || // 0 .. 9
- (code >= 0x0041 && code <= 0x0046) || // A .. F
- (code >= 0x0061 && code <= 0x0066) // a .. f
- );
- }
- // uppercase letter
- // A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z).
- function isUppercaseLetter(code) {
- return code >= 0x0041 && code <= 0x005A;
- }
- // lowercase letter
- // A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z).
- function isLowercaseLetter(code) {
- return code >= 0x0061 && code <= 0x007A;
- }
- // letter
- // An uppercase letter or a lowercase letter.
- function isLetter(code) {
- return isUppercaseLetter(code) || isLowercaseLetter(code);
- }
- // non-ASCII code point
- // A code point with a value equal to or greater than U+0080 <control>.
- function isNonAscii(code) {
- return code >= 0x0080;
- }
- // name-start code point
- // A letter, a non-ASCII code point, or U+005F LOW LINE (_).
- function isNameStart(code) {
- return isLetter(code) || isNonAscii(code) || code === 0x005F;
- }
- // name code point
- // A name-start code point, a digit, or U+002D HYPHEN-MINUS (-).
- function isName(code) {
- return isNameStart(code) || isDigit(code) || code === 0x002D;
- }
- // non-printable code point
- // A code point between U+0000 NULL and U+0008 BACKSPACE, or U+000B LINE TABULATION,
- // or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE, or U+007F DELETE.
- function isNonPrintable(code) {
- return (
- (code >= 0x0000 && code <= 0x0008) ||
- (code === 0x000B) ||
- (code >= 0x000E && code <= 0x001F) ||
- (code === 0x007F)
- );
- }
- // newline
- // U+000A LINE FEED. Note that U+000D CARRIAGE RETURN and U+000C FORM FEED are not included in this definition,
- // as they are converted to U+000A LINE FEED during preprocessing.
- // TODO: we doesn't do a preprocessing, so check a code point for U+000D CARRIAGE RETURN and U+000C FORM FEED
- function isNewline(code) {
- return code === 0x000A || code === 0x000D || code === 0x000C;
- }
- // whitespace
- // A newline, U+0009 CHARACTER TABULATION, or U+0020 SPACE.
- function isWhiteSpace(code) {
- return isNewline(code) || code === 0x0020 || code === 0x0009;
- }
- // § 4.3.8. Check if two code points are a valid escape
- function isValidEscape(first, second) {
- // If the first code point is not U+005C REVERSE SOLIDUS (\), return false.
- if (first !== 0x005C) {
- return false;
- }
- // Otherwise, if the second code point is a newline or EOF, return false.
- if (isNewline(second) || second === EOF) {
- return false;
- }
- // Otherwise, return true.
- return true;
- }
- // § 4.3.9. Check if three code points would start an identifier
- function isIdentifierStart(first, second, third) {
- // Look at the first code point:
- // U+002D HYPHEN-MINUS
- if (first === 0x002D) {
- // If the second code point is a name-start code point or a U+002D HYPHEN-MINUS,
- // or the second and third code points are a valid escape, return true. Otherwise, return false.
- return (
- isNameStart(second) ||
- second === 0x002D ||
- isValidEscape(second, third)
- );
- }
- // name-start code point
- if (isNameStart(first)) {
- // Return true.
- return true;
- }
- // U+005C REVERSE SOLIDUS (\)
- if (first === 0x005C) {
- // If the first and second code points are a valid escape, return true. Otherwise, return false.
- return isValidEscape(first, second);
- }
- // anything else
- // Return false.
- return false;
- }
- // § 4.3.10. Check if three code points would start a number
- function isNumberStart(first, second, third) {
- // Look at the first code point:
- // U+002B PLUS SIGN (+)
- // U+002D HYPHEN-MINUS (-)
- if (first === 0x002B || first === 0x002D) {
- // If the second code point is a digit, return true.
- if (isDigit(second)) {
- return 2;
- }
- // Otherwise, if the second code point is a U+002E FULL STOP (.)
- // and the third code point is a digit, return true.
- // Otherwise, return false.
- return second === 0x002E && isDigit(third) ? 3 : 0;
- }
- // U+002E FULL STOP (.)
- if (first === 0x002E) {
- // If the second code point is a digit, return true. Otherwise, return false.
- return isDigit(second) ? 2 : 0;
- }
- // digit
- if (isDigit(first)) {
- // Return true.
- return 1;
- }
- // anything else
- // Return false.
- return 0;
- }
- //
- // Misc
- //
- // detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark)
- function isBOM(code) {
- // UTF-16BE
- if (code === 0xFEFF) {
- return 1;
- }
- // UTF-16LE
- if (code === 0xFFFE) {
- return 1;
- }
- return 0;
- }
- // Fast code category
- //
- // https://drafts.csswg.org/css-syntax/#tokenizer-definitions
- // > non-ASCII code point
- // > A code point with a value equal to or greater than U+0080 <control>
- // > name-start code point
- // > A letter, a non-ASCII code point, or U+005F LOW LINE (_).
- // > name code point
- // > A name-start code point, a digit, or U+002D HYPHEN-MINUS (-)
- // That means only ASCII code points has a special meaning and we define a maps for 0..127 codes only
- var CATEGORY = new Array(0x80);
- charCodeCategory.Eof = 0x80;
- charCodeCategory.WhiteSpace = 0x82;
- charCodeCategory.Digit = 0x83;
- charCodeCategory.NameStart = 0x84;
- charCodeCategory.NonPrintable = 0x85;
- for (var i = 0; i < CATEGORY.length; i++) {
- switch (true) {
- case isWhiteSpace(i):
- CATEGORY[i] = charCodeCategory.WhiteSpace;
- break;
- case isDigit(i):
- CATEGORY[i] = charCodeCategory.Digit;
- break;
- case isNameStart(i):
- CATEGORY[i] = charCodeCategory.NameStart;
- break;
- case isNonPrintable(i):
- CATEGORY[i] = charCodeCategory.NonPrintable;
- break;
- default:
- CATEGORY[i] = i || charCodeCategory.Eof;
- }
- }
- function charCodeCategory(code) {
- return code < 0x80 ? CATEGORY[code] : charCodeCategory.NameStart;
- };
- module.exports = {
- isDigit: isDigit,
- isHexDigit: isHexDigit,
- isUppercaseLetter: isUppercaseLetter,
- isLowercaseLetter: isLowercaseLetter,
- isLetter: isLetter,
- isNonAscii: isNonAscii,
- isNameStart: isNameStart,
- isName: isName,
- isNonPrintable: isNonPrintable,
- isNewline: isNewline,
- isWhiteSpace: isWhiteSpace,
- isValidEscape: isValidEscape,
- isIdentifierStart: isIdentifierStart,
- isNumberStart: isNumberStart,
- isBOM: isBOM,
- charCodeCategory: charCodeCategory
- };
|