papaparse.js 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878
  1. /* @license
  2. Papa Parse
  3. v5.3.2
  4. https://github.com/mholt/PapaParse
  5. License: MIT
  6. */
  7. (function(root, factory)
  8. {
  9. /* globals define */
  10. if (typeof define === 'function' && define.amd)
  11. {
  12. // AMD. Register as an anonymous module.
  13. define([], factory);
  14. }
  15. else if (typeof module === 'object' && typeof exports !== 'undefined')
  16. {
  17. // Node. Does not work with strict CommonJS, but
  18. // only CommonJS-like environments that support module.exports,
  19. // like Node.
  20. module.exports = factory();
  21. }
  22. else
  23. {
  24. // Browser globals (root is window)
  25. root.Papa = factory();
  26. }
  27. // in strict mode we cannot access arguments.callee, so we need a named reference to
  28. // stringify the factory method for the blob worker
  29. // eslint-disable-next-line func-name
  30. }(this, function moduleFactory()
  31. {
  32. 'use strict';
  33. var global = (function() {
  34. // alternative method, similar to `Function('return this')()`
  35. // but without using `eval` (which is disabled when
  36. // using Content Security Policy).
  37. if (typeof self !== 'undefined') { return self; }
  38. if (typeof window !== 'undefined') { return window; }
  39. if (typeof global !== 'undefined') { return global; }
  40. // When running tests none of the above have been defined
  41. return {};
  42. })();
  43. function getWorkerBlob() {
  44. var URL = global.URL || global.webkitURL || null;
  45. var code = moduleFactory.toString();
  46. return Papa.BLOB_URL || (Papa.BLOB_URL = URL.createObjectURL(new Blob(['(', code, ')();'], {type: 'text/javascript'})));
  47. }
  48. var IS_WORKER = !global.document && !!global.postMessage,
  49. IS_PAPA_WORKER = IS_WORKER && /blob:/i.test((global.location || {}).protocol);
  50. var workers = {}, workerIdCounter = 0;
  51. var Papa = {};
  52. Papa.parse = CsvToJson;
  53. Papa.unparse = JsonToCsv;
  54. Papa.RECORD_SEP = String.fromCharCode(30);
  55. Papa.UNIT_SEP = String.fromCharCode(31);
  56. Papa.BYTE_ORDER_MARK = '\ufeff';
  57. Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
  58. Papa.WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker;
  59. Papa.NODE_STREAM_INPUT = 1;
  60. // Configurable chunk sizes for local and remote files, respectively
  61. Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB
  62. Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB
  63. Papa.DefaultDelimiter = ','; // Used if not specified and detection fails
  64. // Exposed for testing and development only
  65. Papa.Parser = Parser;
  66. Papa.ParserHandle = ParserHandle;
  67. Papa.NetworkStreamer = NetworkStreamer;
  68. Papa.FileStreamer = FileStreamer;
  69. Papa.StringStreamer = StringStreamer;
  70. Papa.ReadableStreamStreamer = ReadableStreamStreamer;
  71. if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
  72. Papa.DuplexStreamStreamer = DuplexStreamStreamer;
  73. }
  74. if (global.jQuery)
  75. {
  76. var $ = global.jQuery;
  77. $.fn.parse = function(options)
  78. {
  79. var config = options.config || {};
  80. var queue = [];
  81. this.each(function(idx)
  82. {
  83. var supported = $(this).prop('tagName').toUpperCase() === 'INPUT'
  84. && $(this).attr('type').toLowerCase() === 'file'
  85. && global.FileReader;
  86. if (!supported || !this.files || this.files.length === 0)
  87. return true; // continue to next input element
  88. for (var i = 0; i < this.files.length; i++)
  89. {
  90. queue.push({
  91. file: this.files[i],
  92. inputElem: this,
  93. instanceConfig: $.extend({}, config)
  94. });
  95. }
  96. });
  97. parseNextFile(); // begin parsing
  98. return this; // maintains chainability
  99. function parseNextFile()
  100. {
  101. if (queue.length === 0)
  102. {
  103. if (isFunction(options.complete))
  104. options.complete();
  105. return;
  106. }
  107. var f = queue[0];
  108. if (isFunction(options.before))
  109. {
  110. var returned = options.before(f.file, f.inputElem);
  111. if (typeof returned === 'object')
  112. {
  113. if (returned.action === 'abort')
  114. {
  115. error('AbortError', f.file, f.inputElem, returned.reason);
  116. return; // Aborts all queued files immediately
  117. }
  118. else if (returned.action === 'skip')
  119. {
  120. fileComplete(); // parse the next file in the queue, if any
  121. return;
  122. }
  123. else if (typeof returned.config === 'object')
  124. f.instanceConfig = $.extend(f.instanceConfig, returned.config);
  125. }
  126. else if (returned === 'skip')
  127. {
  128. fileComplete(); // parse the next file in the queue, if any
  129. return;
  130. }
  131. }
  132. // Wrap up the user's complete callback, if any, so that ours also gets executed
  133. var userCompleteFunc = f.instanceConfig.complete;
  134. f.instanceConfig.complete = function(results)
  135. {
  136. if (isFunction(userCompleteFunc))
  137. userCompleteFunc(results, f.file, f.inputElem);
  138. fileComplete();
  139. };
  140. Papa.parse(f.file, f.instanceConfig);
  141. }
  142. function error(name, file, elem, reason)
  143. {
  144. if (isFunction(options.error))
  145. options.error({name: name}, file, elem, reason);
  146. }
  147. function fileComplete()
  148. {
  149. queue.splice(0, 1);
  150. parseNextFile();
  151. }
  152. };
  153. }
  154. if (IS_PAPA_WORKER)
  155. {
  156. global.onmessage = workerThreadReceivedMessage;
  157. }
  158. function CsvToJson(_input, _config)
  159. {
  160. _config = _config || {};
  161. var dynamicTyping = _config.dynamicTyping || false;
  162. if (isFunction(dynamicTyping)) {
  163. _config.dynamicTypingFunction = dynamicTyping;
  164. // Will be filled on first row call
  165. dynamicTyping = {};
  166. }
  167. _config.dynamicTyping = dynamicTyping;
  168. _config.transform = isFunction(_config.transform) ? _config.transform : false;
  169. if (_config.worker && Papa.WORKERS_SUPPORTED)
  170. {
  171. var w = newWorker();
  172. w.userStep = _config.step;
  173. w.userChunk = _config.chunk;
  174. w.userComplete = _config.complete;
  175. w.userError = _config.error;
  176. _config.step = isFunction(_config.step);
  177. _config.chunk = isFunction(_config.chunk);
  178. _config.complete = isFunction(_config.complete);
  179. _config.error = isFunction(_config.error);
  180. delete _config.worker; // prevent infinite loop
  181. w.postMessage({
  182. input: _input,
  183. config: _config,
  184. workerId: w.id
  185. });
  186. return;
  187. }
  188. var streamer = null;
  189. if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined')
  190. {
  191. // create a node Duplex stream for use
  192. // with .pipe
  193. streamer = new DuplexStreamStreamer(_config);
  194. return streamer.getStream();
  195. }
  196. else if (typeof _input === 'string')
  197. {
  198. if (_config.download)
  199. streamer = new NetworkStreamer(_config);
  200. else
  201. streamer = new StringStreamer(_config);
  202. }
  203. else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on))
  204. {
  205. streamer = new ReadableStreamStreamer(_config);
  206. }
  207. else if ((global.File && _input instanceof File) || _input instanceof Object) // ...Safari. (see issue #106)
  208. streamer = new FileStreamer(_config);
  209. return streamer.stream(_input);
  210. }
  211. function JsonToCsv(_input, _config)
  212. {
  213. // Default configuration
  214. /** whether to surround every datum with quotes */
  215. var _quotes = false;
  216. /** whether to write headers */
  217. var _writeHeader = true;
  218. /** delimiting character(s) */
  219. var _delimiter = ',';
  220. /** newline character(s) */
  221. var _newline = '\r\n';
  222. /** quote character */
  223. var _quoteChar = '"';
  224. /** escaped quote character, either "" or <config.escapeChar>" */
  225. var _escapedQuote = _quoteChar + _quoteChar;
  226. /** whether to skip empty lines */
  227. var _skipEmptyLines = false;
  228. /** the columns (keys) we expect when we unparse objects */
  229. var _columns = null;
  230. /** whether to prevent outputting cells that can be parsed as formulae by spreadsheet software (Excel and LibreOffice) */
  231. var _escapeFormulae = false;
  232. unpackConfig();
  233. var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
  234. if (typeof _input === 'string')
  235. _input = JSON.parse(_input);
  236. if (Array.isArray(_input))
  237. {
  238. if (!_input.length || Array.isArray(_input[0]))
  239. return serialize(null, _input, _skipEmptyLines);
  240. else if (typeof _input[0] === 'object')
  241. return serialize(_columns || Object.keys(_input[0]), _input, _skipEmptyLines);
  242. }
  243. else if (typeof _input === 'object')
  244. {
  245. if (typeof _input.data === 'string')
  246. _input.data = JSON.parse(_input.data);
  247. if (Array.isArray(_input.data))
  248. {
  249. if (!_input.fields)
  250. _input.fields = _input.meta && _input.meta.fields || _columns;
  251. if (!_input.fields)
  252. _input.fields = Array.isArray(_input.data[0])
  253. ? _input.fields
  254. : typeof _input.data[0] === 'object'
  255. ? Object.keys(_input.data[0])
  256. : [];
  257. if (!(Array.isArray(_input.data[0])) && typeof _input.data[0] !== 'object')
  258. _input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
  259. }
  260. return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
  261. }
  262. // Default (any valid paths should return before this)
  263. throw new Error('Unable to serialize unrecognized input');
  264. function unpackConfig()
  265. {
  266. if (typeof _config !== 'object')
  267. return;
  268. if (typeof _config.delimiter === 'string'
  269. && !Papa.BAD_DELIMITERS.filter(function(value) { return _config.delimiter.indexOf(value) !== -1; }).length)
  270. {
  271. _delimiter = _config.delimiter;
  272. }
  273. if (typeof _config.quotes === 'boolean'
  274. || typeof _config.quotes === 'function'
  275. || Array.isArray(_config.quotes))
  276. _quotes = _config.quotes;
  277. if (typeof _config.skipEmptyLines === 'boolean'
  278. || typeof _config.skipEmptyLines === 'string')
  279. _skipEmptyLines = _config.skipEmptyLines;
  280. if (typeof _config.newline === 'string')
  281. _newline = _config.newline;
  282. if (typeof _config.quoteChar === 'string')
  283. _quoteChar = _config.quoteChar;
  284. if (typeof _config.header === 'boolean')
  285. _writeHeader = _config.header;
  286. if (Array.isArray(_config.columns)) {
  287. if (_config.columns.length === 0) throw new Error('Option columns is empty');
  288. _columns = _config.columns;
  289. }
  290. if (_config.escapeChar !== undefined) {
  291. _escapedQuote = _config.escapeChar + _quoteChar;
  292. }
  293. if (typeof _config.escapeFormulae === 'boolean' || _config.escapeFormulae instanceof RegExp) {
  294. _escapeFormulae = _config.escapeFormulae instanceof RegExp ? _config.escapeFormulae : /^[=+\-@\t\r].*$/;
  295. }
  296. }
  297. /** The double for loop that iterates the data and writes out a CSV string including header row */
  298. function serialize(fields, data, skipEmptyLines)
  299. {
  300. var csv = '';
  301. if (typeof fields === 'string')
  302. fields = JSON.parse(fields);
  303. if (typeof data === 'string')
  304. data = JSON.parse(data);
  305. var hasHeader = Array.isArray(fields) && fields.length > 0;
  306. var dataKeyedByField = !(Array.isArray(data[0]));
  307. // If there a header row, write it first
  308. if (hasHeader && _writeHeader)
  309. {
  310. for (var i = 0; i < fields.length; i++)
  311. {
  312. if (i > 0)
  313. csv += _delimiter;
  314. csv += safe(fields[i], i);
  315. }
  316. if (data.length > 0)
  317. csv += _newline;
  318. }
  319. // Then write out the data
  320. for (var row = 0; row < data.length; row++)
  321. {
  322. var maxCol = hasHeader ? fields.length : data[row].length;
  323. var emptyLine = false;
  324. var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
  325. if (skipEmptyLines && !hasHeader)
  326. {
  327. emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0;
  328. }
  329. if (skipEmptyLines === 'greedy' && hasHeader) {
  330. var line = [];
  331. for (var c = 0; c < maxCol; c++) {
  332. var cx = dataKeyedByField ? fields[c] : c;
  333. line.push(data[row][cx]);
  334. }
  335. emptyLine = line.join('').trim() === '';
  336. }
  337. if (!emptyLine)
  338. {
  339. for (var col = 0; col < maxCol; col++)
  340. {
  341. if (col > 0 && !nullLine)
  342. csv += _delimiter;
  343. var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
  344. csv += safe(data[row][colIdx], col);
  345. }
  346. if (row < data.length - 1 && (!skipEmptyLines || (maxCol > 0 && !nullLine)))
  347. {
  348. csv += _newline;
  349. }
  350. }
  351. }
  352. return csv;
  353. }
  354. /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
  355. function safe(str, col)
  356. {
  357. if (typeof str === 'undefined' || str === null)
  358. return '';
  359. if (str.constructor === Date)
  360. return JSON.stringify(str).slice(1, 25);
  361. var needsQuotes = false;
  362. if (_escapeFormulae && typeof str === "string" && _escapeFormulae.test(str)) {
  363. str = "'" + str;
  364. needsQuotes = true;
  365. }
  366. var escapedQuoteStr = str.toString().replace(quoteCharRegex, _escapedQuote);
  367. needsQuotes = needsQuotes
  368. || _quotes === true
  369. || (typeof _quotes === 'function' && _quotes(str, col))
  370. || (Array.isArray(_quotes) && _quotes[col])
  371. || hasAny(escapedQuoteStr, Papa.BAD_DELIMITERS)
  372. || escapedQuoteStr.indexOf(_delimiter) > -1
  373. || escapedQuoteStr.charAt(0) === ' '
  374. || escapedQuoteStr.charAt(escapedQuoteStr.length - 1) === ' ';
  375. return needsQuotes ? _quoteChar + escapedQuoteStr + _quoteChar : escapedQuoteStr;
  376. }
  377. function hasAny(str, substrings)
  378. {
  379. for (var i = 0; i < substrings.length; i++)
  380. if (str.indexOf(substrings[i]) > -1)
  381. return true;
  382. return false;
  383. }
  384. }
  385. /** ChunkStreamer is the base prototype for various streamer implementations. */
  386. function ChunkStreamer(config)
  387. {
  388. this._handle = null;
  389. this._finished = false;
  390. this._completed = false;
  391. this._halted = false;
  392. this._input = null;
  393. this._baseIndex = 0;
  394. this._partialLine = '';
  395. this._rowCount = 0;
  396. this._start = 0;
  397. this._nextChunk = null;
  398. this.isFirstChunk = true;
  399. this._completeResults = {
  400. data: [],
  401. errors: [],
  402. meta: {}
  403. };
  404. replaceConfig.call(this, config);
  405. this.parseChunk = function(chunk, isFakeChunk)
  406. {
  407. // First chunk pre-processing
  408. if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk))
  409. {
  410. var modifiedChunk = this._config.beforeFirstChunk(chunk);
  411. if (modifiedChunk !== undefined)
  412. chunk = modifiedChunk;
  413. }
  414. this.isFirstChunk = false;
  415. this._halted = false;
  416. // Rejoin the line we likely just split in two by chunking the file
  417. var aggregate = this._partialLine + chunk;
  418. this._partialLine = '';
  419. var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
  420. if (this._handle.paused() || this._handle.aborted()) {
  421. this._halted = true;
  422. return;
  423. }
  424. var lastIndex = results.meta.cursor;
  425. if (!this._finished)
  426. {
  427. this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
  428. this._baseIndex = lastIndex;
  429. }
  430. if (results && results.data)
  431. this._rowCount += results.data.length;
  432. var finishedIncludingPreview = this._finished || (this._config.preview && this._rowCount >= this._config.preview);
  433. if (IS_PAPA_WORKER)
  434. {
  435. global.postMessage({
  436. results: results,
  437. workerId: Papa.WORKER_ID,
  438. finished: finishedIncludingPreview
  439. });
  440. }
  441. else if (isFunction(this._config.chunk) && !isFakeChunk)
  442. {
  443. this._config.chunk(results, this._handle);
  444. if (this._handle.paused() || this._handle.aborted()) {
  445. this._halted = true;
  446. return;
  447. }
  448. results = undefined;
  449. this._completeResults = undefined;
  450. }
  451. if (!this._config.step && !this._config.chunk) {
  452. this._completeResults.data = this._completeResults.data.concat(results.data);
  453. this._completeResults.errors = this._completeResults.errors.concat(results.errors);
  454. this._completeResults.meta = results.meta;
  455. }
  456. if (!this._completed && finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted)) {
  457. this._config.complete(this._completeResults, this._input);
  458. this._completed = true;
  459. }
  460. if (!finishedIncludingPreview && (!results || !results.meta.paused))
  461. this._nextChunk();
  462. return results;
  463. };
  464. this._sendError = function(error)
  465. {
  466. if (isFunction(this._config.error))
  467. this._config.error(error);
  468. else if (IS_PAPA_WORKER && this._config.error)
  469. {
  470. global.postMessage({
  471. workerId: Papa.WORKER_ID,
  472. error: error,
  473. finished: false
  474. });
  475. }
  476. };
  477. function replaceConfig(config)
  478. {
  479. // Deep-copy the config so we can edit it
  480. var configCopy = copy(config);
  481. configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
  482. if (!config.step && !config.chunk)
  483. configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
  484. this._handle = new ParserHandle(configCopy);
  485. this._handle.streamer = this;
  486. this._config = configCopy; // persist the copy to the caller
  487. }
  488. }
  489. function NetworkStreamer(config)
  490. {
  491. config = config || {};
  492. if (!config.chunkSize)
  493. config.chunkSize = Papa.RemoteChunkSize;
  494. ChunkStreamer.call(this, config);
  495. var xhr;
  496. if (IS_WORKER)
  497. {
  498. this._nextChunk = function()
  499. {
  500. this._readChunk();
  501. this._chunkLoaded();
  502. };
  503. }
  504. else
  505. {
  506. this._nextChunk = function()
  507. {
  508. this._readChunk();
  509. };
  510. }
  511. this.stream = function(url)
  512. {
  513. this._input = url;
  514. this._nextChunk(); // Starts streaming
  515. };
  516. this._readChunk = function()
  517. {
  518. if (this._finished)
  519. {
  520. this._chunkLoaded();
  521. return;
  522. }
  523. xhr = new XMLHttpRequest();
  524. if (this._config.withCredentials)
  525. {
  526. xhr.withCredentials = this._config.withCredentials;
  527. }
  528. if (!IS_WORKER)
  529. {
  530. xhr.onload = bindFunction(this._chunkLoaded, this);
  531. xhr.onerror = bindFunction(this._chunkError, this);
  532. }
  533. xhr.open(this._config.downloadRequestBody ? 'POST' : 'GET', this._input, !IS_WORKER);
  534. // Headers can only be set when once the request state is OPENED
  535. if (this._config.downloadRequestHeaders)
  536. {
  537. var headers = this._config.downloadRequestHeaders;
  538. for (var headerName in headers)
  539. {
  540. xhr.setRequestHeader(headerName, headers[headerName]);
  541. }
  542. }
  543. if (this._config.chunkSize)
  544. {
  545. var end = this._start + this._config.chunkSize - 1; // minus one because byte range is inclusive
  546. xhr.setRequestHeader('Range', 'bytes=' + this._start + '-' + end);
  547. }
  548. try {
  549. xhr.send(this._config.downloadRequestBody);
  550. }
  551. catch (err) {
  552. this._chunkError(err.message);
  553. }
  554. if (IS_WORKER && xhr.status === 0)
  555. this._chunkError();
  556. };
  557. this._chunkLoaded = function()
  558. {
  559. if (xhr.readyState !== 4)
  560. return;
  561. if (xhr.status < 200 || xhr.status >= 400)
  562. {
  563. this._chunkError();
  564. return;
  565. }
  566. // Use chunckSize as it may be a diference on reponse lentgh due to characters with more than 1 byte
  567. this._start += this._config.chunkSize ? this._config.chunkSize : xhr.responseText.length;
  568. this._finished = !this._config.chunkSize || this._start >= getFileSize(xhr);
  569. this.parseChunk(xhr.responseText);
  570. };
  571. this._chunkError = function(errorMessage)
  572. {
  573. var errorText = xhr.statusText || errorMessage;
  574. this._sendError(new Error(errorText));
  575. };
  576. function getFileSize(xhr)
  577. {
  578. var contentRange = xhr.getResponseHeader('Content-Range');
  579. if (contentRange === null) { // no content range, then finish!
  580. return -1;
  581. }
  582. return parseInt(contentRange.substring(contentRange.lastIndexOf('/') + 1));
  583. }
  584. }
  585. NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype);
  586. NetworkStreamer.prototype.constructor = NetworkStreamer;
  587. function FileStreamer(config)
  588. {
  589. config = config || {};
  590. if (!config.chunkSize)
  591. config.chunkSize = Papa.LocalChunkSize;
  592. ChunkStreamer.call(this, config);
  593. var reader, slice;
  594. // FileReader is better than FileReaderSync (even in worker) - see http://stackoverflow.com/q/24708649/1048862
  595. // But Firefox is a pill, too - see issue #76: https://github.com/mholt/PapaParse/issues/76
  596. var usingAsyncReader = typeof FileReader !== 'undefined'; // Safari doesn't consider it a function - see issue #105
  597. this.stream = function(file)
  598. {
  599. this._input = file;
  600. slice = file.slice || file.webkitSlice || file.mozSlice;
  601. if (usingAsyncReader)
  602. {
  603. reader = new FileReader(); // Preferred method of reading files, even in workers
  604. reader.onload = bindFunction(this._chunkLoaded, this);
  605. reader.onerror = bindFunction(this._chunkError, this);
  606. }
  607. else
  608. reader = new FileReaderSync(); // Hack for running in a web worker in Firefox
  609. this._nextChunk(); // Starts streaming
  610. };
  611. this._nextChunk = function()
  612. {
  613. if (!this._finished && (!this._config.preview || this._rowCount < this._config.preview))
  614. this._readChunk();
  615. };
  616. this._readChunk = function()
  617. {
  618. var input = this._input;
  619. if (this._config.chunkSize)
  620. {
  621. var end = Math.min(this._start + this._config.chunkSize, this._input.size);
  622. input = slice.call(input, this._start, end);
  623. }
  624. var txt = reader.readAsText(input, this._config.encoding);
  625. if (!usingAsyncReader)
  626. this._chunkLoaded({ target: { result: txt } }); // mimic the async signature
  627. };
  628. this._chunkLoaded = function(event)
  629. {
  630. // Very important to increment start each time before handling results
  631. this._start += this._config.chunkSize;
  632. this._finished = !this._config.chunkSize || this._start >= this._input.size;
  633. this.parseChunk(event.target.result);
  634. };
  635. this._chunkError = function()
  636. {
  637. this._sendError(reader.error);
  638. };
  639. }
  640. FileStreamer.prototype = Object.create(ChunkStreamer.prototype);
  641. FileStreamer.prototype.constructor = FileStreamer;
  642. function StringStreamer(config)
  643. {
  644. config = config || {};
  645. ChunkStreamer.call(this, config);
  646. var remaining;
  647. this.stream = function(s)
  648. {
  649. remaining = s;
  650. return this._nextChunk();
  651. };
  652. this._nextChunk = function()
  653. {
  654. if (this._finished) return;
  655. var size = this._config.chunkSize;
  656. var chunk;
  657. if(size) {
  658. chunk = remaining.substring(0, size);
  659. remaining = remaining.substring(size);
  660. } else {
  661. chunk = remaining;
  662. remaining = '';
  663. }
  664. this._finished = !remaining;
  665. return this.parseChunk(chunk);
  666. };
  667. }
  668. StringStreamer.prototype = Object.create(StringStreamer.prototype);
  669. StringStreamer.prototype.constructor = StringStreamer;
  670. function ReadableStreamStreamer(config)
  671. {
  672. config = config || {};
  673. ChunkStreamer.call(this, config);
  674. var queue = [];
  675. var parseOnData = true;
  676. var streamHasEnded = false;
  677. this.pause = function()
  678. {
  679. ChunkStreamer.prototype.pause.apply(this, arguments);
  680. this._input.pause();
  681. };
  682. this.resume = function()
  683. {
  684. ChunkStreamer.prototype.resume.apply(this, arguments);
  685. this._input.resume();
  686. };
  687. this.stream = function(stream)
  688. {
  689. this._input = stream;
  690. this._input.on('data', this._streamData);
  691. this._input.on('end', this._streamEnd);
  692. this._input.on('error', this._streamError);
  693. };
  694. this._checkIsFinished = function()
  695. {
  696. if (streamHasEnded && queue.length === 1) {
  697. this._finished = true;
  698. }
  699. };
  700. this._nextChunk = function()
  701. {
  702. this._checkIsFinished();
  703. if (queue.length)
  704. {
  705. this.parseChunk(queue.shift());
  706. }
  707. else
  708. {
  709. parseOnData = true;
  710. }
  711. };
  712. this._streamData = bindFunction(function(chunk)
  713. {
  714. try
  715. {
  716. queue.push(typeof chunk === 'string' ? chunk : chunk.toString(this._config.encoding));
  717. if (parseOnData)
  718. {
  719. parseOnData = false;
  720. this._checkIsFinished();
  721. this.parseChunk(queue.shift());
  722. }
  723. }
  724. catch (error)
  725. {
  726. this._streamError(error);
  727. }
  728. }, this);
  729. this._streamError = bindFunction(function(error)
  730. {
  731. this._streamCleanUp();
  732. this._sendError(error);
  733. }, this);
  734. this._streamEnd = bindFunction(function()
  735. {
  736. this._streamCleanUp();
  737. streamHasEnded = true;
  738. this._streamData('');
  739. }, this);
  740. this._streamCleanUp = bindFunction(function()
  741. {
  742. this._input.removeListener('data', this._streamData);
  743. this._input.removeListener('end', this._streamEnd);
  744. this._input.removeListener('error', this._streamError);
  745. }, this);
  746. }
  747. ReadableStreamStreamer.prototype = Object.create(ChunkStreamer.prototype);
  748. ReadableStreamStreamer.prototype.constructor = ReadableStreamStreamer;
  749. function DuplexStreamStreamer(_config) {
  750. var Duplex = require('stream').Duplex;
  751. var config = copy(_config);
  752. var parseOnWrite = true;
  753. var writeStreamHasFinished = false;
  754. var parseCallbackQueue = [];
  755. var stream = null;
  756. this._onCsvData = function(results)
  757. {
  758. var data = results.data;
  759. if (!stream.push(data) && !this._handle.paused()) {
  760. // the writeable consumer buffer has filled up
  761. // so we need to pause until more items
  762. // can be processed
  763. this._handle.pause();
  764. }
  765. };
  766. this._onCsvComplete = function()
  767. {
  768. // node will finish the read stream when
  769. // null is pushed
  770. stream.push(null);
  771. };
  772. config.step = bindFunction(this._onCsvData, this);
  773. config.complete = bindFunction(this._onCsvComplete, this);
  774. ChunkStreamer.call(this, config);
  775. this._nextChunk = function()
  776. {
  777. if (writeStreamHasFinished && parseCallbackQueue.length === 1) {
  778. this._finished = true;
  779. }
  780. if (parseCallbackQueue.length) {
  781. parseCallbackQueue.shift()();
  782. } else {
  783. parseOnWrite = true;
  784. }
  785. };
  786. this._addToParseQueue = function(chunk, callback)
  787. {
  788. // add to queue so that we can indicate
  789. // completion via callback
  790. // node will automatically pause the incoming stream
  791. // when too many items have been added without their
  792. // callback being invoked
  793. parseCallbackQueue.push(bindFunction(function() {
  794. this.parseChunk(typeof chunk === 'string' ? chunk : chunk.toString(config.encoding));
  795. if (isFunction(callback)) {
  796. return callback();
  797. }
  798. }, this));
  799. if (parseOnWrite) {
  800. parseOnWrite = false;
  801. this._nextChunk();
  802. }
  803. };
  804. this._onRead = function()
  805. {
  806. if (this._handle.paused()) {
  807. // the writeable consumer can handle more data
  808. // so resume the chunk parsing
  809. this._handle.resume();
  810. }
  811. };
  812. this._onWrite = function(chunk, encoding, callback)
  813. {
  814. this._addToParseQueue(chunk, callback);
  815. };
  816. this._onWriteComplete = function()
  817. {
  818. writeStreamHasFinished = true;
  819. // have to write empty string
  820. // so parser knows its done
  821. this._addToParseQueue('');
  822. };
  823. this.getStream = function()
  824. {
  825. return stream;
  826. };
  827. stream = new Duplex({
  828. readableObjectMode: true,
  829. decodeStrings: false,
  830. read: bindFunction(this._onRead, this),
  831. write: bindFunction(this._onWrite, this)
  832. });
  833. stream.once('finish', bindFunction(this._onWriteComplete, this));
  834. }
  835. if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
  836. DuplexStreamStreamer.prototype = Object.create(ChunkStreamer.prototype);
  837. DuplexStreamStreamer.prototype.constructor = DuplexStreamStreamer;
  838. }
  839. // Use one ParserHandle per entire CSV file or string
  840. function ParserHandle(_config)
  841. {
  842. // One goal is to minimize the use of regular expressions...
  843. var MAX_FLOAT = Math.pow(2, 53);
  844. var MIN_FLOAT = -MAX_FLOAT;
  845. var FLOAT = /^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/;
  846. var ISO_DATE = /^(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))$/;
  847. var self = this;
  848. var _stepCounter = 0; // Number of times step was called (number of rows parsed)
  849. var _rowCounter = 0; // Number of rows that have been parsed so far
  850. var _input; // The input being parsed
  851. var _parser; // The core parser being used
  852. var _paused = false; // Whether we are paused or not
  853. var _aborted = false; // Whether the parser has aborted or not
  854. var _delimiterError; // Temporary state between delimiter detection and processing results
  855. var _fields = []; // Fields are from the header row of the input, if there is one
  856. var _results = { // The last results returned from the parser
  857. data: [],
  858. errors: [],
  859. meta: {}
  860. };
  861. if (isFunction(_config.step))
  862. {
  863. var userStep = _config.step;
  864. _config.step = function(results)
  865. {
  866. _results = results;
  867. if (needsHeaderRow())
  868. processResults();
  869. else // only call user's step function after header row
  870. {
  871. processResults();
  872. // It's possbile that this line was empty and there's no row here after all
  873. if (_results.data.length === 0)
  874. return;
  875. _stepCounter += results.data.length;
  876. if (_config.preview && _stepCounter > _config.preview)
  877. _parser.abort();
  878. else {
  879. _results.data = _results.data[0];
  880. userStep(_results, self);
  881. }
  882. }
  883. };
  884. }
  885. /**
  886. * Parses input. Most users won't need, and shouldn't mess with, the baseIndex
  887. * and ignoreLastRow parameters. They are used by streamers (wrapper functions)
  888. * when an input comes in multiple chunks, like from a file.
  889. */
  890. this.parse = function(input, baseIndex, ignoreLastRow)
  891. {
  892. var quoteChar = _config.quoteChar || '"';
  893. if (!_config.newline)
  894. _config.newline = guessLineEndings(input, quoteChar);
  895. _delimiterError = false;
  896. if (!_config.delimiter)
  897. {
  898. var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
  899. if (delimGuess.successful)
  900. _config.delimiter = delimGuess.bestDelimiter;
  901. else
  902. {
  903. _delimiterError = true; // add error after parsing (otherwise it would be overwritten)
  904. _config.delimiter = Papa.DefaultDelimiter;
  905. }
  906. _results.meta.delimiter = _config.delimiter;
  907. }
  908. else if(isFunction(_config.delimiter))
  909. {
  910. _config.delimiter = _config.delimiter(input);
  911. _results.meta.delimiter = _config.delimiter;
  912. }
  913. var parserConfig = copy(_config);
  914. if (_config.preview && _config.header)
  915. parserConfig.preview++; // to compensate for header row
  916. _input = input;
  917. _parser = new Parser(parserConfig);
  918. _results = _parser.parse(_input, baseIndex, ignoreLastRow);
  919. processResults();
  920. return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } });
  921. };
  922. this.paused = function()
  923. {
  924. return _paused;
  925. };
  926. this.pause = function()
  927. {
  928. _paused = true;
  929. _parser.abort();
  930. // If it is streaming via "chunking", the reader will start appending correctly already so no need to substring,
  931. // otherwise we can get duplicate content within a row
  932. _input = isFunction(_config.chunk) ? "" : _input.substring(_parser.getCharIndex());
  933. };
  934. this.resume = function()
  935. {
  936. if(self.streamer._halted) {
  937. _paused = false;
  938. self.streamer.parseChunk(_input, true);
  939. } else {
  940. // Bugfix: #636 In case the processing hasn't halted yet
  941. // wait for it to halt in order to resume
  942. setTimeout(self.resume, 3);
  943. }
  944. };
  945. this.aborted = function()
  946. {
  947. return _aborted;
  948. };
  949. this.abort = function()
  950. {
  951. _aborted = true;
  952. _parser.abort();
  953. _results.meta.aborted = true;
  954. if (isFunction(_config.complete))
  955. _config.complete(_results);
  956. _input = '';
  957. };
  958. function testEmptyLine(s) {
  959. return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0;
  960. }
  961. function testFloat(s) {
  962. if (FLOAT.test(s)) {
  963. var floatValue = parseFloat(s);
  964. if (floatValue > MIN_FLOAT && floatValue < MAX_FLOAT) {
  965. return true;
  966. }
  967. }
  968. return false;
  969. }
  970. function processResults()
  971. {
  972. if (_results && _delimiterError)
  973. {
  974. addError('Delimiter', 'UndetectableDelimiter', 'Unable to auto-detect delimiting character; defaulted to \'' + Papa.DefaultDelimiter + '\'');
  975. _delimiterError = false;
  976. }
  977. if (_config.skipEmptyLines)
  978. {
  979. _results.data = _results.data.filter(function(d) {
  980. return !testEmptyLine(d);
  981. });
  982. }
  983. if (needsHeaderRow())
  984. fillHeaderFields();
  985. return applyHeaderAndDynamicTypingAndTransformation();
  986. }
  987. function needsHeaderRow()
  988. {
  989. return _config.header && _fields.length === 0;
  990. }
  991. function fillHeaderFields()
  992. {
  993. if (!_results)
  994. return;
  995. function addHeader(header, i)
  996. {
  997. if (isFunction(_config.transformHeader))
  998. header = _config.transformHeader(header, i);
  999. _fields.push(header);
  1000. }
  1001. if (Array.isArray(_results.data[0]))
  1002. {
  1003. for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
  1004. _results.data[i].forEach(addHeader);
  1005. _results.data.splice(0, 1);
  1006. }
  1007. // if _results.data[0] is not an array, we are in a step where _results.data is the row.
  1008. else
  1009. _results.data.forEach(addHeader);
  1010. }
  1011. function shouldApplyDynamicTyping(field) {
  1012. // Cache function values to avoid calling it for each row
  1013. if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
  1014. _config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
  1015. }
  1016. return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
  1017. }
  1018. function parseDynamic(field, value)
  1019. {
  1020. if (shouldApplyDynamicTyping(field))
  1021. {
  1022. if (value === 'true' || value === 'TRUE')
  1023. return true;
  1024. else if (value === 'false' || value === 'FALSE')
  1025. return false;
  1026. else if (testFloat(value))
  1027. return parseFloat(value);
  1028. else if (ISO_DATE.test(value))
  1029. return new Date(value);
  1030. else
  1031. return (value === '' ? null : value);
  1032. }
  1033. return value;
  1034. }
  1035. function applyHeaderAndDynamicTypingAndTransformation()
  1036. {
  1037. if (!_results || (!_config.header && !_config.dynamicTyping && !_config.transform))
  1038. return _results;
  1039. function processRow(rowSource, i)
  1040. {
  1041. var row = _config.header ? {} : [];
  1042. var j;
  1043. for (j = 0; j < rowSource.length; j++)
  1044. {
  1045. var field = j;
  1046. var value = rowSource[j];
  1047. if (_config.header)
  1048. field = j >= _fields.length ? '__parsed_extra' : _fields[j];
  1049. if (_config.transform)
  1050. value = _config.transform(value,field);
  1051. value = parseDynamic(field, value);
  1052. if (field === '__parsed_extra')
  1053. {
  1054. row[field] = row[field] || [];
  1055. row[field].push(value);
  1056. }
  1057. else
  1058. row[field] = value;
  1059. }
  1060. if (_config.header)
  1061. {
  1062. if (j > _fields.length)
  1063. addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
  1064. else if (j < _fields.length)
  1065. addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
  1066. }
  1067. return row;
  1068. }
  1069. var incrementBy = 1;
  1070. if (!_results.data.length || Array.isArray(_results.data[0]))
  1071. {
  1072. _results.data = _results.data.map(processRow);
  1073. incrementBy = _results.data.length;
  1074. }
  1075. else
  1076. _results.data = processRow(_results.data, 0);
  1077. if (_config.header && _results.meta)
  1078. _results.meta.fields = _fields;
  1079. _rowCounter += incrementBy;
  1080. return _results;
  1081. }
  1082. function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
  1083. var bestDelim, bestDelta, fieldCountPrevRow, maxFieldCount;
  1084. delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
  1085. for (var i = 0; i < delimitersToGuess.length; i++) {
  1086. var delim = delimitersToGuess[i];
  1087. var delta = 0, avgFieldCount = 0, emptyLinesCount = 0;
  1088. fieldCountPrevRow = undefined;
  1089. var preview = new Parser({
  1090. comments: comments,
  1091. delimiter: delim,
  1092. newline: newline,
  1093. preview: 10
  1094. }).parse(input);
  1095. for (var j = 0; j < preview.data.length; j++) {
  1096. if (skipEmptyLines && testEmptyLine(preview.data[j])) {
  1097. emptyLinesCount++;
  1098. continue;
  1099. }
  1100. var fieldCount = preview.data[j].length;
  1101. avgFieldCount += fieldCount;
  1102. if (typeof fieldCountPrevRow === 'undefined') {
  1103. fieldCountPrevRow = fieldCount;
  1104. continue;
  1105. }
  1106. else if (fieldCount > 0) {
  1107. delta += Math.abs(fieldCount - fieldCountPrevRow);
  1108. fieldCountPrevRow = fieldCount;
  1109. }
  1110. }
  1111. if (preview.data.length > 0)
  1112. avgFieldCount /= (preview.data.length - emptyLinesCount);
  1113. if ((typeof bestDelta === 'undefined' || delta <= bestDelta)
  1114. && (typeof maxFieldCount === 'undefined' || avgFieldCount > maxFieldCount) && avgFieldCount > 1.99) {
  1115. bestDelta = delta;
  1116. bestDelim = delim;
  1117. maxFieldCount = avgFieldCount;
  1118. }
  1119. }
  1120. _config.delimiter = bestDelim;
  1121. return {
  1122. successful: !!bestDelim,
  1123. bestDelimiter: bestDelim
  1124. };
  1125. }
  1126. function guessLineEndings(input, quoteChar)
  1127. {
  1128. input = input.substring(0, 1024 * 1024); // max length 1 MB
  1129. // Replace all the text inside quotes
  1130. var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
  1131. input = input.replace(re, '');
  1132. var r = input.split('\r');
  1133. var n = input.split('\n');
  1134. var nAppearsFirst = (n.length > 1 && n[0].length < r[0].length);
  1135. if (r.length === 1 || nAppearsFirst)
  1136. return '\n';
  1137. var numWithN = 0;
  1138. for (var i = 0; i < r.length; i++)
  1139. {
  1140. if (r[i][0] === '\n')
  1141. numWithN++;
  1142. }
  1143. return numWithN >= r.length / 2 ? '\r\n' : '\r';
  1144. }
  1145. function addError(type, code, msg, row)
  1146. {
  1147. var error = {
  1148. type: type,
  1149. code: code,
  1150. message: msg
  1151. };
  1152. if(row !== undefined) {
  1153. error.row = row;
  1154. }
  1155. _results.errors.push(error);
  1156. }
  1157. }
  1158. /** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
  1159. function escapeRegExp(string)
  1160. {
  1161. return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
  1162. }
  1163. /** The core parser implements speedy and correct CSV parsing */
  1164. function Parser(config)
  1165. {
  1166. // Unpack the config object
  1167. config = config || {};
  1168. var delim = config.delimiter;
  1169. var newline = config.newline;
  1170. var comments = config.comments;
  1171. var step = config.step;
  1172. var preview = config.preview;
  1173. var fastMode = config.fastMode;
  1174. var quoteChar;
  1175. if (config.quoteChar === undefined || config.quoteChar === null) {
  1176. quoteChar = '"';
  1177. } else {
  1178. quoteChar = config.quoteChar;
  1179. }
  1180. var escapeChar = quoteChar;
  1181. if (config.escapeChar !== undefined) {
  1182. escapeChar = config.escapeChar;
  1183. }
  1184. // Delimiter must be valid
  1185. if (typeof delim !== 'string'
  1186. || Papa.BAD_DELIMITERS.indexOf(delim) > -1)
  1187. delim = ',';
  1188. // Comment character must be valid
  1189. if (comments === delim)
  1190. throw new Error('Comment character same as delimiter');
  1191. else if (comments === true)
  1192. comments = '#';
  1193. else if (typeof comments !== 'string'
  1194. || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
  1195. comments = false;
  1196. // Newline must be valid: \r, \n, or \r\n
  1197. if (newline !== '\n' && newline !== '\r' && newline !== '\r\n')
  1198. newline = '\n';
  1199. // We're gonna need these at the Parser scope
  1200. var cursor = 0;
  1201. var aborted = false;
  1202. this.parse = function(input, baseIndex, ignoreLastRow)
  1203. {
  1204. // For some reason, in Chrome, this speeds things up (!?)
  1205. if (typeof input !== 'string')
  1206. throw new Error('Input must be a string');
  1207. // We don't need to compute some of these every time parse() is called,
  1208. // but having them in a more local scope seems to perform better
  1209. var inputLen = input.length,
  1210. delimLen = delim.length,
  1211. newlineLen = newline.length,
  1212. commentsLen = comments.length;
  1213. var stepIsFunction = isFunction(step);
  1214. // Establish starting state
  1215. cursor = 0;
  1216. var data = [], errors = [], row = [], lastCursor = 0;
  1217. if (!input)
  1218. return returnable();
  1219. if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
  1220. {
  1221. var rows = input.split(newline);
  1222. for (var i = 0; i < rows.length; i++)
  1223. {
  1224. row = rows[i];
  1225. cursor += row.length;
  1226. if (i !== rows.length - 1)
  1227. cursor += newline.length;
  1228. else if (ignoreLastRow)
  1229. return returnable();
  1230. if (comments && row.substring(0, commentsLen) === comments)
  1231. continue;
  1232. if (stepIsFunction)
  1233. {
  1234. data = [];
  1235. pushRow(row.split(delim));
  1236. doStep();
  1237. if (aborted)
  1238. return returnable();
  1239. }
  1240. else
  1241. pushRow(row.split(delim));
  1242. if (preview && i >= preview)
  1243. {
  1244. data = data.slice(0, preview);
  1245. return returnable(true);
  1246. }
  1247. }
  1248. return returnable();
  1249. }
  1250. var nextDelim = input.indexOf(delim, cursor);
  1251. var nextNewline = input.indexOf(newline, cursor);
  1252. var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
  1253. var quoteSearch = input.indexOf(quoteChar, cursor);
  1254. // Parser loop
  1255. for (;;)
  1256. {
  1257. // Field has opening quote
  1258. if (input[cursor] === quoteChar)
  1259. {
  1260. // Start our search for the closing quote where the cursor is
  1261. quoteSearch = cursor;
  1262. // Skip the opening quote
  1263. cursor++;
  1264. for (;;)
  1265. {
  1266. // Find closing quote
  1267. quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
  1268. //No other quotes are found - no other delimiters
  1269. if (quoteSearch === -1)
  1270. {
  1271. if (!ignoreLastRow) {
  1272. // No closing quote... what a pity
  1273. errors.push({
  1274. type: 'Quotes',
  1275. code: 'MissingQuotes',
  1276. message: 'Quoted field unterminated',
  1277. row: data.length, // row has yet to be inserted
  1278. index: cursor
  1279. });
  1280. }
  1281. return finish();
  1282. }
  1283. // Closing quote at EOF
  1284. if (quoteSearch === inputLen - 1)
  1285. {
  1286. var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
  1287. return finish(value);
  1288. }
  1289. // If this quote is escaped, it's part of the data; skip it
  1290. // If the quote character is the escape character, then check if the next character is the escape character
  1291. if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar)
  1292. {
  1293. quoteSearch++;
  1294. continue;
  1295. }
  1296. // If the quote character is not the escape character, then check if the previous character was the escape character
  1297. if (quoteChar !== escapeChar && quoteSearch !== 0 && input[quoteSearch - 1] === escapeChar)
  1298. {
  1299. continue;
  1300. }
  1301. if(nextDelim !== -1 && nextDelim < (quoteSearch + 1)) {
  1302. nextDelim = input.indexOf(delim, (quoteSearch + 1));
  1303. }
  1304. if(nextNewline !== -1 && nextNewline < (quoteSearch + 1)) {
  1305. nextNewline = input.indexOf(newline, (quoteSearch + 1));
  1306. }
  1307. // Check up to nextDelim or nextNewline, whichever is closest
  1308. var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
  1309. var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
  1310. // Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
  1311. if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndDelimiter, delimLen) === delim)
  1312. {
  1313. row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
  1314. cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
  1315. // If char after following delimiter is not quoteChar, we find next quote char position
  1316. if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen] !== quoteChar)
  1317. {
  1318. quoteSearch = input.indexOf(quoteChar, cursor);
  1319. }
  1320. nextDelim = input.indexOf(delim, cursor);
  1321. nextNewline = input.indexOf(newline, cursor);
  1322. break;
  1323. }
  1324. var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
  1325. // Closing quote followed by newline or 'unnecessary spaces + newLine'
  1326. if (input.substring(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen) === newline)
  1327. {
  1328. row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
  1329. saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
  1330. nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
  1331. quoteSearch = input.indexOf(quoteChar, cursor); // we search for first quote in next line
  1332. if (stepIsFunction)
  1333. {
  1334. doStep();
  1335. if (aborted)
  1336. return returnable();
  1337. }
  1338. if (preview && data.length >= preview)
  1339. return returnable(true);
  1340. break;
  1341. }
  1342. // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
  1343. errors.push({
  1344. type: 'Quotes',
  1345. code: 'InvalidQuotes',
  1346. message: 'Trailing quote on quoted field is malformed',
  1347. row: data.length, // row has yet to be inserted
  1348. index: cursor
  1349. });
  1350. quoteSearch++;
  1351. continue;
  1352. }
  1353. continue;
  1354. }
  1355. // Comment found at start of new line
  1356. if (comments && row.length === 0 && input.substring(cursor, cursor + commentsLen) === comments)
  1357. {
  1358. if (nextNewline === -1) // Comment ends at EOF
  1359. return returnable();
  1360. cursor = nextNewline + newlineLen;
  1361. nextNewline = input.indexOf(newline, cursor);
  1362. nextDelim = input.indexOf(delim, cursor);
  1363. continue;
  1364. }
  1365. // Next delimiter comes before next newline, so we've reached end of field
  1366. if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1))
  1367. {
  1368. row.push(input.substring(cursor, nextDelim));
  1369. cursor = nextDelim + delimLen;
  1370. // we look for next delimiter char
  1371. nextDelim = input.indexOf(delim, cursor);
  1372. continue;
  1373. }
  1374. // End of row
  1375. if (nextNewline !== -1)
  1376. {
  1377. row.push(input.substring(cursor, nextNewline));
  1378. saveRow(nextNewline + newlineLen);
  1379. if (stepIsFunction)
  1380. {
  1381. doStep();
  1382. if (aborted)
  1383. return returnable();
  1384. }
  1385. if (preview && data.length >= preview)
  1386. return returnable(true);
  1387. continue;
  1388. }
  1389. break;
  1390. }
  1391. return finish();
  1392. function pushRow(row)
  1393. {
  1394. data.push(row);
  1395. lastCursor = cursor;
  1396. }
  1397. /**
  1398. * checks if there are extra spaces after closing quote and given index without any text
  1399. * if Yes, returns the number of spaces
  1400. */
  1401. function extraSpaces(index) {
  1402. var spaceLength = 0;
  1403. if (index !== -1) {
  1404. var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
  1405. if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
  1406. spaceLength = textBetweenClosingQuoteAndIndex.length;
  1407. }
  1408. }
  1409. return spaceLength;
  1410. }
  1411. /**
  1412. * Appends the remaining input from cursor to the end into
  1413. * row, saves the row, calls step, and returns the results.
  1414. */
  1415. function finish(value)
  1416. {
  1417. if (ignoreLastRow)
  1418. return returnable();
  1419. if (typeof value === 'undefined')
  1420. value = input.substring(cursor);
  1421. row.push(value);
  1422. cursor = inputLen; // important in case parsing is paused
  1423. pushRow(row);
  1424. if (stepIsFunction)
  1425. doStep();
  1426. return returnable();
  1427. }
  1428. /**
  1429. * Appends the current row to the results. It sets the cursor
  1430. * to newCursor and finds the nextNewline. The caller should
  1431. * take care to execute user's step function and check for
  1432. * preview and end parsing if necessary.
  1433. */
  1434. function saveRow(newCursor)
  1435. {
  1436. cursor = newCursor;
  1437. pushRow(row);
  1438. row = [];
  1439. nextNewline = input.indexOf(newline, cursor);
  1440. }
  1441. /** Returns an object with the results, errors, and meta. */
  1442. function returnable(stopped)
  1443. {
  1444. return {
  1445. data: data,
  1446. errors: errors,
  1447. meta: {
  1448. delimiter: delim,
  1449. linebreak: newline,
  1450. aborted: aborted,
  1451. truncated: !!stopped,
  1452. cursor: lastCursor + (baseIndex || 0)
  1453. }
  1454. };
  1455. }
  1456. /** Executes the user's step function and resets data & errors. */
  1457. function doStep()
  1458. {
  1459. step(returnable());
  1460. data = [];
  1461. errors = [];
  1462. }
  1463. };
  1464. /** Sets the abort flag */
  1465. this.abort = function()
  1466. {
  1467. aborted = true;
  1468. };
  1469. /** Gets the cursor position */
  1470. this.getCharIndex = function()
  1471. {
  1472. return cursor;
  1473. };
  1474. }
  1475. function newWorker()
  1476. {
  1477. if (!Papa.WORKERS_SUPPORTED)
  1478. return false;
  1479. var workerUrl = getWorkerBlob();
  1480. var w = new global.Worker(workerUrl);
  1481. w.onmessage = mainThreadReceivedMessage;
  1482. w.id = workerIdCounter++;
  1483. workers[w.id] = w;
  1484. return w;
  1485. }
  1486. /** Callback when main thread receives a message */
  1487. function mainThreadReceivedMessage(e)
  1488. {
  1489. var msg = e.data;
  1490. var worker = workers[msg.workerId];
  1491. var aborted = false;
  1492. if (msg.error)
  1493. worker.userError(msg.error, msg.file);
  1494. else if (msg.results && msg.results.data)
  1495. {
  1496. var abort = function() {
  1497. aborted = true;
  1498. completeWorker(msg.workerId, { data: [], errors: [], meta: { aborted: true } });
  1499. };
  1500. var handle = {
  1501. abort: abort,
  1502. pause: notImplemented,
  1503. resume: notImplemented
  1504. };
  1505. if (isFunction(worker.userStep))
  1506. {
  1507. for (var i = 0; i < msg.results.data.length; i++)
  1508. {
  1509. worker.userStep({
  1510. data: msg.results.data[i],
  1511. errors: msg.results.errors,
  1512. meta: msg.results.meta
  1513. }, handle);
  1514. if (aborted)
  1515. break;
  1516. }
  1517. delete msg.results; // free memory ASAP
  1518. }
  1519. else if (isFunction(worker.userChunk))
  1520. {
  1521. worker.userChunk(msg.results, handle, msg.file);
  1522. delete msg.results;
  1523. }
  1524. }
  1525. if (msg.finished && !aborted)
  1526. completeWorker(msg.workerId, msg.results);
  1527. }
  1528. function completeWorker(workerId, results) {
  1529. var worker = workers[workerId];
  1530. if (isFunction(worker.userComplete))
  1531. worker.userComplete(results);
  1532. worker.terminate();
  1533. delete workers[workerId];
  1534. }
  1535. function notImplemented() {
  1536. throw new Error('Not implemented.');
  1537. }
  1538. /** Callback when worker thread receives a message */
  1539. function workerThreadReceivedMessage(e)
  1540. {
  1541. var msg = e.data;
  1542. if (typeof Papa.WORKER_ID === 'undefined' && msg)
  1543. Papa.WORKER_ID = msg.workerId;
  1544. if (typeof msg.input === 'string')
  1545. {
  1546. global.postMessage({
  1547. workerId: Papa.WORKER_ID,
  1548. results: Papa.parse(msg.input, msg.config),
  1549. finished: true
  1550. });
  1551. }
  1552. else if ((global.File && msg.input instanceof File) || msg.input instanceof Object) // thank you, Safari (see issue #106)
  1553. {
  1554. var results = Papa.parse(msg.input, msg.config);
  1555. if (results)
  1556. global.postMessage({
  1557. workerId: Papa.WORKER_ID,
  1558. results: results,
  1559. finished: true
  1560. });
  1561. }
  1562. }
  1563. /** Makes a deep copy of an array or object (mostly) */
  1564. function copy(obj)
  1565. {
  1566. if (typeof obj !== 'object' || obj === null)
  1567. return obj;
  1568. var cpy = Array.isArray(obj) ? [] : {};
  1569. for (var key in obj)
  1570. cpy[key] = copy(obj[key]);
  1571. return cpy;
  1572. }
  1573. function bindFunction(f, self)
  1574. {
  1575. return function() { f.apply(self, arguments); };
  1576. }
  1577. function isFunction(func)
  1578. {
  1579. return typeof func === 'function';
  1580. }
  1581. return Papa;
  1582. }));