TokenStream.js 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. var constants = require('../tokenizer/const');
  2. var TYPE = constants.TYPE;
  3. var NAME = constants.NAME;
  4. var utils = require('../tokenizer/utils');
  5. var cmpStr = utils.cmpStr;
  6. var EOF = TYPE.EOF;
  7. var WHITESPACE = TYPE.WhiteSpace;
  8. var COMMENT = TYPE.Comment;
  9. var OFFSET_MASK = 0x00FFFFFF;
  10. var TYPE_SHIFT = 24;
  11. var TokenStream = function() {
  12. this.offsetAndType = null;
  13. this.balance = null;
  14. this.reset();
  15. };
  16. TokenStream.prototype = {
  17. reset: function() {
  18. this.eof = false;
  19. this.tokenIndex = -1;
  20. this.tokenType = 0;
  21. this.tokenStart = this.firstCharOffset;
  22. this.tokenEnd = this.firstCharOffset;
  23. },
  24. lookupType: function(offset) {
  25. offset += this.tokenIndex;
  26. if (offset < this.tokenCount) {
  27. return this.offsetAndType[offset] >> TYPE_SHIFT;
  28. }
  29. return EOF;
  30. },
  31. lookupOffset: function(offset) {
  32. offset += this.tokenIndex;
  33. if (offset < this.tokenCount) {
  34. return this.offsetAndType[offset - 1] & OFFSET_MASK;
  35. }
  36. return this.source.length;
  37. },
  38. lookupValue: function(offset, referenceStr) {
  39. offset += this.tokenIndex;
  40. if (offset < this.tokenCount) {
  41. return cmpStr(
  42. this.source,
  43. this.offsetAndType[offset - 1] & OFFSET_MASK,
  44. this.offsetAndType[offset] & OFFSET_MASK,
  45. referenceStr
  46. );
  47. }
  48. return false;
  49. },
  50. getTokenStart: function(tokenIndex) {
  51. if (tokenIndex === this.tokenIndex) {
  52. return this.tokenStart;
  53. }
  54. if (tokenIndex > 0) {
  55. return tokenIndex < this.tokenCount
  56. ? this.offsetAndType[tokenIndex - 1] & OFFSET_MASK
  57. : this.offsetAndType[this.tokenCount] & OFFSET_MASK;
  58. }
  59. return this.firstCharOffset;
  60. },
  61. // TODO: -> skipUntilBalanced
  62. getRawLength: function(startToken, mode) {
  63. var cursor = startToken;
  64. var balanceEnd;
  65. var offset = this.offsetAndType[Math.max(cursor - 1, 0)] & OFFSET_MASK;
  66. var type;
  67. loop:
  68. for (; cursor < this.tokenCount; cursor++) {
  69. balanceEnd = this.balance[cursor];
  70. // stop scanning on balance edge that points to offset before start token
  71. if (balanceEnd < startToken) {
  72. break loop;
  73. }
  74. type = this.offsetAndType[cursor] >> TYPE_SHIFT;
  75. // check token is stop type
  76. switch (mode(type, this.source, offset)) {
  77. case 1:
  78. break loop;
  79. case 2:
  80. cursor++;
  81. break loop;
  82. default:
  83. offset = this.offsetAndType[cursor] & OFFSET_MASK;
  84. // fast forward to the end of balanced block
  85. if (this.balance[balanceEnd] === cursor) {
  86. cursor = balanceEnd;
  87. }
  88. }
  89. }
  90. return cursor - this.tokenIndex;
  91. },
  92. isBalanceEdge: function(pos) {
  93. return this.balance[this.tokenIndex] < pos;
  94. },
  95. isDelim: function(code, offset) {
  96. if (offset) {
  97. return (
  98. this.lookupType(offset) === TYPE.Delim &&
  99. this.source.charCodeAt(this.lookupOffset(offset)) === code
  100. );
  101. }
  102. return (
  103. this.tokenType === TYPE.Delim &&
  104. this.source.charCodeAt(this.tokenStart) === code
  105. );
  106. },
  107. getTokenValue: function() {
  108. return this.source.substring(this.tokenStart, this.tokenEnd);
  109. },
  110. getTokenLength: function() {
  111. return this.tokenEnd - this.tokenStart;
  112. },
  113. substrToCursor: function(start) {
  114. return this.source.substring(start, this.tokenStart);
  115. },
  116. skipWS: function() {
  117. for (var i = this.tokenIndex, skipTokenCount = 0; i < this.tokenCount; i++, skipTokenCount++) {
  118. if ((this.offsetAndType[i] >> TYPE_SHIFT) !== WHITESPACE) {
  119. break;
  120. }
  121. }
  122. if (skipTokenCount > 0) {
  123. this.skip(skipTokenCount);
  124. }
  125. },
  126. skipSC: function() {
  127. while (this.tokenType === WHITESPACE || this.tokenType === COMMENT) {
  128. this.next();
  129. }
  130. },
  131. skip: function(tokenCount) {
  132. var next = this.tokenIndex + tokenCount;
  133. if (next < this.tokenCount) {
  134. this.tokenIndex = next;
  135. this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK;
  136. next = this.offsetAndType[next];
  137. this.tokenType = next >> TYPE_SHIFT;
  138. this.tokenEnd = next & OFFSET_MASK;
  139. } else {
  140. this.tokenIndex = this.tokenCount;
  141. this.next();
  142. }
  143. },
  144. next: function() {
  145. var next = this.tokenIndex + 1;
  146. if (next < this.tokenCount) {
  147. this.tokenIndex = next;
  148. this.tokenStart = this.tokenEnd;
  149. next = this.offsetAndType[next];
  150. this.tokenType = next >> TYPE_SHIFT;
  151. this.tokenEnd = next & OFFSET_MASK;
  152. } else {
  153. this.tokenIndex = this.tokenCount;
  154. this.eof = true;
  155. this.tokenType = EOF;
  156. this.tokenStart = this.tokenEnd = this.source.length;
  157. }
  158. },
  159. dump: function() {
  160. var offset = this.firstCharOffset;
  161. return Array.prototype.slice.call(this.offsetAndType, 0, this.tokenCount).map(function(item, idx) {
  162. var start = offset;
  163. var end = item & OFFSET_MASK;
  164. offset = end;
  165. return {
  166. idx: idx,
  167. type: NAME[item >> TYPE_SHIFT],
  168. chunk: this.source.substring(start, end),
  169. balance: this.balance[idx]
  170. };
  171. }, this);
  172. }
  173. };
  174. module.exports = TokenStream;