char-code-definitions.js 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. var EOF = 0;
  2. // https://drafts.csswg.org/css-syntax-3/
  3. // § 4.2. Definitions
  4. // digit
  5. // A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9).
  6. function isDigit(code) {
  7. return code >= 0x0030 && code <= 0x0039;
  8. }
  9. // hex digit
  10. // A digit, or a code point between U+0041 LATIN CAPITAL LETTER A (A) and U+0046 LATIN CAPITAL LETTER F (F),
  11. // or a code point between U+0061 LATIN SMALL LETTER A (a) and U+0066 LATIN SMALL LETTER F (f).
  12. function isHexDigit(code) {
  13. return (
  14. isDigit(code) || // 0 .. 9
  15. (code >= 0x0041 && code <= 0x0046) || // A .. F
  16. (code >= 0x0061 && code <= 0x0066) // a .. f
  17. );
  18. }
  19. // uppercase letter
  20. // A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z).
  21. function isUppercaseLetter(code) {
  22. return code >= 0x0041 && code <= 0x005A;
  23. }
  24. // lowercase letter
  25. // A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z).
  26. function isLowercaseLetter(code) {
  27. return code >= 0x0061 && code <= 0x007A;
  28. }
  29. // letter
  30. // An uppercase letter or a lowercase letter.
  31. function isLetter(code) {
  32. return isUppercaseLetter(code) || isLowercaseLetter(code);
  33. }
  34. // non-ASCII code point
  35. // A code point with a value equal to or greater than U+0080 <control>.
  36. function isNonAscii(code) {
  37. return code >= 0x0080;
  38. }
  39. // name-start code point
  40. // A letter, a non-ASCII code point, or U+005F LOW LINE (_).
  41. function isNameStart(code) {
  42. return isLetter(code) || isNonAscii(code) || code === 0x005F;
  43. }
  44. // name code point
  45. // A name-start code point, a digit, or U+002D HYPHEN-MINUS (-).
  46. function isName(code) {
  47. return isNameStart(code) || isDigit(code) || code === 0x002D;
  48. }
  49. // non-printable code point
  50. // A code point between U+0000 NULL and U+0008 BACKSPACE, or U+000B LINE TABULATION,
  51. // or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE, or U+007F DELETE.
  52. function isNonPrintable(code) {
  53. return (
  54. (code >= 0x0000 && code <= 0x0008) ||
  55. (code === 0x000B) ||
  56. (code >= 0x000E && code <= 0x001F) ||
  57. (code === 0x007F)
  58. );
  59. }
  60. // newline
  61. // U+000A LINE FEED. Note that U+000D CARRIAGE RETURN and U+000C FORM FEED are not included in this definition,
  62. // as they are converted to U+000A LINE FEED during preprocessing.
  63. // TODO: we doesn't do a preprocessing, so check a code point for U+000D CARRIAGE RETURN and U+000C FORM FEED
  64. function isNewline(code) {
  65. return code === 0x000A || code === 0x000D || code === 0x000C;
  66. }
  67. // whitespace
  68. // A newline, U+0009 CHARACTER TABULATION, or U+0020 SPACE.
  69. function isWhiteSpace(code) {
  70. return isNewline(code) || code === 0x0020 || code === 0x0009;
  71. }
  72. // § 4.3.8. Check if two code points are a valid escape
  73. function isValidEscape(first, second) {
  74. // If the first code point is not U+005C REVERSE SOLIDUS (\), return false.
  75. if (first !== 0x005C) {
  76. return false;
  77. }
  78. // Otherwise, if the second code point is a newline or EOF, return false.
  79. if (isNewline(second) || second === EOF) {
  80. return false;
  81. }
  82. // Otherwise, return true.
  83. return true;
  84. }
  85. // § 4.3.9. Check if three code points would start an identifier
  86. function isIdentifierStart(first, second, third) {
  87. // Look at the first code point:
  88. // U+002D HYPHEN-MINUS
  89. if (first === 0x002D) {
  90. // If the second code point is a name-start code point or a U+002D HYPHEN-MINUS,
  91. // or the second and third code points are a valid escape, return true. Otherwise, return false.
  92. return (
  93. isNameStart(second) ||
  94. second === 0x002D ||
  95. isValidEscape(second, third)
  96. );
  97. }
  98. // name-start code point
  99. if (isNameStart(first)) {
  100. // Return true.
  101. return true;
  102. }
  103. // U+005C REVERSE SOLIDUS (\)
  104. if (first === 0x005C) {
  105. // If the first and second code points are a valid escape, return true. Otherwise, return false.
  106. return isValidEscape(first, second);
  107. }
  108. // anything else
  109. // Return false.
  110. return false;
  111. }
  112. // § 4.3.10. Check if three code points would start a number
  113. function isNumberStart(first, second, third) {
  114. // Look at the first code point:
  115. // U+002B PLUS SIGN (+)
  116. // U+002D HYPHEN-MINUS (-)
  117. if (first === 0x002B || first === 0x002D) {
  118. // If the second code point is a digit, return true.
  119. if (isDigit(second)) {
  120. return 2;
  121. }
  122. // Otherwise, if the second code point is a U+002E FULL STOP (.)
  123. // and the third code point is a digit, return true.
  124. // Otherwise, return false.
  125. return second === 0x002E && isDigit(third) ? 3 : 0;
  126. }
  127. // U+002E FULL STOP (.)
  128. if (first === 0x002E) {
  129. // If the second code point is a digit, return true. Otherwise, return false.
  130. return isDigit(second) ? 2 : 0;
  131. }
  132. // digit
  133. if (isDigit(first)) {
  134. // Return true.
  135. return 1;
  136. }
  137. // anything else
  138. // Return false.
  139. return 0;
  140. }
  141. //
  142. // Misc
  143. //
  144. // detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark)
  145. function isBOM(code) {
  146. // UTF-16BE
  147. if (code === 0xFEFF) {
  148. return 1;
  149. }
  150. // UTF-16LE
  151. if (code === 0xFFFE) {
  152. return 1;
  153. }
  154. return 0;
  155. }
  156. // Fast code category
  157. //
  158. // https://drafts.csswg.org/css-syntax/#tokenizer-definitions
  159. // > non-ASCII code point
  160. // > A code point with a value equal to or greater than U+0080 <control>
  161. // > name-start code point
  162. // > A letter, a non-ASCII code point, or U+005F LOW LINE (_).
  163. // > name code point
  164. // > A name-start code point, a digit, or U+002D HYPHEN-MINUS (-)
  165. // That means only ASCII code points has a special meaning and we define a maps for 0..127 codes only
  166. var CATEGORY = new Array(0x80);
  167. charCodeCategory.Eof = 0x80;
  168. charCodeCategory.WhiteSpace = 0x82;
  169. charCodeCategory.Digit = 0x83;
  170. charCodeCategory.NameStart = 0x84;
  171. charCodeCategory.NonPrintable = 0x85;
  172. for (var i = 0; i < CATEGORY.length; i++) {
  173. switch (true) {
  174. case isWhiteSpace(i):
  175. CATEGORY[i] = charCodeCategory.WhiteSpace;
  176. break;
  177. case isDigit(i):
  178. CATEGORY[i] = charCodeCategory.Digit;
  179. break;
  180. case isNameStart(i):
  181. CATEGORY[i] = charCodeCategory.NameStart;
  182. break;
  183. case isNonPrintable(i):
  184. CATEGORY[i] = charCodeCategory.NonPrintable;
  185. break;
  186. default:
  187. CATEGORY[i] = i || charCodeCategory.Eof;
  188. }
  189. }
  190. function charCodeCategory(code) {
  191. return code < 0x80 ? CATEGORY[code] : charCodeCategory.NameStart;
  192. };
  193. module.exports = {
  194. isDigit: isDigit,
  195. isHexDigit: isHexDigit,
  196. isUppercaseLetter: isUppercaseLetter,
  197. isLowercaseLetter: isLowercaseLetter,
  198. isLetter: isLetter,
  199. isNonAscii: isNonAscii,
  200. isNameStart: isNameStart,
  201. isName: isName,
  202. isNonPrintable: isNonPrintable,
  203. isNewline: isNewline,
  204. isWhiteSpace: isWhiteSpace,
  205. isValidEscape: isValidEscape,
  206. isIdentifierStart: isIdentifierStart,
  207. isNumberStart: isNumberStart,
  208. isBOM: isBOM,
  209. charCodeCategory: charCodeCategory
  210. };