index.js 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. "use strict";
  2. const punycode = require("punycode");
  3. const regexes = require("./lib/regexes.js");
  4. const mappingTable = require("./lib/mappingTable.json");
  5. function containsNonASCII(str) {
  6. return /[^\x00-\x7F]/.test(str);
  7. }
  8. function findStatus(val, { useSTD3ASCIIRules }) {
  9. let start = 0;
  10. let end = mappingTable.length - 1;
  11. while (start <= end) {
  12. const mid = Math.floor((start + end) / 2);
  13. const target = mappingTable[mid];
  14. if (target[0][0] <= val && target[0][1] >= val) {
  15. if (target[1].startsWith("disallowed_STD3_")) {
  16. const newStatus = useSTD3ASCIIRules ? "disallowed" : target[1].slice(16);
  17. return [newStatus, ...target.slice(2)];
  18. }
  19. return target.slice(1);
  20. } else if (target[0][0] > val) {
  21. end = mid - 1;
  22. } else {
  23. start = mid + 1;
  24. }
  25. }
  26. return null;
  27. }
  28. function mapChars(domainName, { useSTD3ASCIIRules, processingOption }) {
  29. let hasError = false;
  30. let processed = "";
  31. for (const ch of domainName) {
  32. const [status, mapping] = findStatus(ch.codePointAt(0), { useSTD3ASCIIRules });
  33. switch (status) {
  34. case "disallowed":
  35. hasError = true;
  36. processed += ch;
  37. break;
  38. case "ignored":
  39. break;
  40. case "mapped":
  41. processed += mapping;
  42. break;
  43. case "deviation":
  44. if (processingOption === "transitional") {
  45. processed += mapping;
  46. } else {
  47. processed += ch;
  48. }
  49. break;
  50. case "valid":
  51. processed += ch;
  52. break;
  53. }
  54. }
  55. return {
  56. string: processed,
  57. error: hasError
  58. };
  59. }
  60. function validateLabel(label, { checkHyphens, checkBidi, checkJoiners, processingOption, useSTD3ASCIIRules }) {
  61. if (label.normalize("NFC") !== label) {
  62. return false;
  63. }
  64. const codePoints = Array.from(label);
  65. if (checkHyphens) {
  66. if ((codePoints[2] === "-" && codePoints[3] === "-") ||
  67. (label.startsWith("-") || label.endsWith("-"))) {
  68. return false;
  69. }
  70. }
  71. if (label.includes(".") ||
  72. (codePoints.length > 0 && regexes.combiningMarks.test(codePoints[0]))) {
  73. return false;
  74. }
  75. for (const ch of codePoints) {
  76. const [status] = findStatus(ch.codePointAt(0), { useSTD3ASCIIRules });
  77. if ((processingOption === "transitional" && status !== "valid") ||
  78. (processingOption === "nontransitional" &&
  79. status !== "valid" && status !== "deviation")) {
  80. return false;
  81. }
  82. }
  83. // https://tools.ietf.org/html/rfc5892#appendix-A
  84. if (checkJoiners) {
  85. let last = 0;
  86. for (const [i, ch] of codePoints.entries()) {
  87. if (ch === "\u200C" || ch === "\u200D") {
  88. if (i > 0) {
  89. if (regexes.combiningClassVirama.test(codePoints[i - 1])) {
  90. continue;
  91. }
  92. if (ch === "\u200C") {
  93. // TODO: make this more efficient
  94. const next = codePoints.indexOf("\u200C", i + 1);
  95. const test = next < 0 ? codePoints.slice(last) : codePoints.slice(last, next);
  96. if (regexes.validZWNJ.test(test.join(""))) {
  97. last = i + 1;
  98. continue;
  99. }
  100. }
  101. }
  102. return false;
  103. }
  104. }
  105. }
  106. // https://tools.ietf.org/html/rfc5893#section-2
  107. if (checkBidi) {
  108. let rtl;
  109. // 1
  110. if (regexes.bidiS1LTR.test(codePoints[0])) {
  111. rtl = false;
  112. } else if (regexes.bidiS1RTL.test(codePoints[0])) {
  113. rtl = true;
  114. } else {
  115. return false;
  116. }
  117. if (rtl) {
  118. // 2-4
  119. if (!regexes.bidiS2.test(label) ||
  120. !regexes.bidiS3.test(label) ||
  121. (regexes.bidiS4EN.test(label) && regexes.bidiS4AN.test(label))) {
  122. return false;
  123. }
  124. } else if (!regexes.bidiS5.test(label) ||
  125. !regexes.bidiS6.test(label)) { // 5-6
  126. return false;
  127. }
  128. }
  129. return true;
  130. }
  131. function isBidiDomain(labels) {
  132. const domain = labels.map(label => {
  133. if (label.startsWith("xn--")) {
  134. try {
  135. return punycode.decode(label.substring(4));
  136. } catch (err) {
  137. return "";
  138. }
  139. }
  140. return label;
  141. }).join(".");
  142. return regexes.bidiDomain.test(domain);
  143. }
  144. function processing(domainName, options) {
  145. const { processingOption } = options;
  146. // 1. Map.
  147. let { string, error } = mapChars(domainName, options);
  148. // 2. Normalize.
  149. string = string.normalize("NFC");
  150. // 3. Break.
  151. const labels = string.split(".");
  152. const isBidi = isBidiDomain(labels);
  153. // 4. Convert/Validate.
  154. for (const [i, origLabel] of labels.entries()) {
  155. let label = origLabel;
  156. let curProcessing = processingOption;
  157. if (label.startsWith("xn--")) {
  158. try {
  159. label = punycode.decode(label.substring(4));
  160. labels[i] = label;
  161. } catch (err) {
  162. error = true;
  163. continue;
  164. }
  165. curProcessing = "nontransitional";
  166. }
  167. // No need to validate if we already know there is an error.
  168. if (error) {
  169. continue;
  170. }
  171. const validation = validateLabel(label, Object.assign({}, options, {
  172. processingOption: curProcessing,
  173. checkBidi: options.checkBidi && isBidi
  174. }));
  175. if (!validation) {
  176. error = true;
  177. }
  178. }
  179. return {
  180. string: labels.join("."),
  181. error
  182. };
  183. }
  184. function toASCII(domainName, {
  185. checkHyphens = false,
  186. checkBidi = false,
  187. checkJoiners = false,
  188. useSTD3ASCIIRules = false,
  189. processingOption = "nontransitional",
  190. verifyDNSLength = false
  191. } = {}) {
  192. if (processingOption !== "transitional" && processingOption !== "nontransitional") {
  193. throw new RangeError("processingOption must be either transitional or nontransitional");
  194. }
  195. const result = processing(domainName, {
  196. processingOption,
  197. checkHyphens,
  198. checkBidi,
  199. checkJoiners,
  200. useSTD3ASCIIRules
  201. });
  202. let labels = result.string.split(".");
  203. labels = labels.map(l => {
  204. if (containsNonASCII(l)) {
  205. try {
  206. return "xn--" + punycode.encode(l);
  207. } catch (e) {
  208. result.error = true;
  209. }
  210. }
  211. return l;
  212. });
  213. if (verifyDNSLength) {
  214. const total = labels.join(".").length;
  215. if (total > 253 || total === 0) {
  216. result.error = true;
  217. }
  218. for (let i = 0; i < labels.length; ++i) {
  219. if (labels[i].length > 63 || labels[i].length === 0) {
  220. result.error = true;
  221. break;
  222. }
  223. }
  224. }
  225. if (result.error) {
  226. return null;
  227. }
  228. return labels.join(".");
  229. }
  230. function toUnicode(domainName, {
  231. checkHyphens = false,
  232. checkBidi = false,
  233. checkJoiners = false,
  234. useSTD3ASCIIRules = false
  235. } = {}) {
  236. const result = processing(domainName, {
  237. processingOption: "nontransitional",
  238. checkHyphens,
  239. checkBidi,
  240. checkJoiners,
  241. useSTD3ASCIIRules
  242. });
  243. return {
  244. domain: result.string,
  245. error: result.error
  246. };
  247. }
  248. module.exports = {
  249. toASCII,
  250. toUnicode
  251. };