From 9bae35871069549a06ee84307e2c74d8cbb8829b Mon Sep 17 00:00:00 2001 From: YfengJ <166808804+YfengJ@users.noreply.github.com> Date: Sat, 20 Jun 2026 00:14:47 +0800 Subject: [PATCH 1/2] fix: ship csv-parse stream cjs export --- packages/csv-parse/dist/cjs/stream.cjs | 2021 +++++++++++++++++ packages/csv-parse/dist/cjs/stream.d.cts | 23 + packages/csv-parse/package.json | 2 +- packages/csv-parse/rollup.config.js | 15 + .../csv-parse/test/api.stream.cjs_export.js | 19 + 5 files changed, 2079 insertions(+), 1 deletion(-) create mode 100644 packages/csv-parse/dist/cjs/stream.cjs create mode 100644 packages/csv-parse/dist/cjs/stream.d.cts create mode 100644 packages/csv-parse/test/api.stream.cjs_export.js diff --git a/packages/csv-parse/dist/cjs/stream.cjs b/packages/csv-parse/dist/cjs/stream.cjs new file mode 100644 index 00000000..635049d6 --- /dev/null +++ b/packages/csv-parse/dist/cjs/stream.cjs @@ -0,0 +1,2021 @@ +'use strict'; + +var web = require('node:stream/web'); + +class CsvError extends Error { + constructor(code, message, options, ...contexts) { + if (Array.isArray(message)) message = message.join(" ").trim(); + super(message); + if (Error.captureStackTrace !== undefined) { + Error.captureStackTrace(this, CsvError); + } + this.code = code; + for (const context of contexts) { + for (const key in context) { + const value = context[key]; + this[key] = Buffer.isBuffer(value) + ? value.toString(options.encoding) + : value == null + ? value + : JSON.parse(JSON.stringify(value)); + } + } + } +} + +const is_object = function (obj) { + return typeof obj === "object" && obj !== null && !Array.isArray(obj); +}; + +const normalize_columns_array = function (columns) { + const normalizedColumns = []; + for (let i = 0, l = columns.length; i < l; i++) { + const column = columns[i]; + if (column === undefined || column === null || column === false) { + normalizedColumns[i] = { disabled: true }; + } else if (typeof column === "string" || typeof column === "number") { + normalizedColumns[i] = { name: `${column}` }; + } else if (is_object(column)) { + if (typeof column.name !== "string") { + throw new CsvError("CSV_OPTION_COLUMNS_MISSING_NAME", [ + "Option columns missing name:", + `property "name" is required at position ${i}`, + "when column is an object literal", + ]); + } + normalizedColumns[i] = column; + } else { + throw new CsvError("CSV_INVALID_COLUMN_DEFINITION", [ + "Invalid column definition:", + "expect a string or a literal object,", + `got ${JSON.stringify(column)} at position ${i}`, + ]); + } + } + return normalizedColumns; +}; + +class ResizeableBuffer { + constructor(size = 100) { + this.size = size; + this.length = 0; + this.buf = Buffer.allocUnsafe(size); + } + prepend(val) { + if (Buffer.isBuffer(val)) { + const length = this.length + val.length; + if (length >= this.size) { + this.resize(); + if (length >= this.size) { + throw Error("INVALID_BUFFER_STATE"); + } + } + const buf = this.buf; + this.buf = Buffer.allocUnsafe(this.size); + val.copy(this.buf, 0); + buf.copy(this.buf, val.length); + this.length += val.length; + } else { + const length = this.length++; + if (length === this.size) { + this.resize(); + } + const buf = this.clone(); + this.buf[0] = val; + buf.copy(this.buf, 1, 0, length); + } + } + append(val) { + const length = this.length++; + if (length === this.size) { + this.resize(); + } + this.buf[length] = val; + } + clone() { + return Buffer.from(this.buf.slice(0, this.length)); + } + resize() { + const length = this.length; + this.size = this.size * 2; + const buf = Buffer.allocUnsafe(this.size); + this.buf.copy(buf, 0, 0, length); + this.buf = buf; + } + toString(encoding) { + if (encoding) { + return this.buf.slice(0, this.length).toString(encoding); + } else { + return Uint8Array.prototype.slice.call(this.buf.slice(0, this.length)); + } + } + toJSON() { + return this.toString("utf8"); + } + reset() { + this.length = 0; + } +} + +const init_state = function (options) { + // ECMAScript WhiteSpace + LineTerminator codepoints, encoded under + // `options.encoding`. Aligns trimming with `String.prototype.trim()`. + // https://tc39.es/ecma262/#sec-white-space + // https://tc39.es/ecma262/#sec-line-terminators + // + // Codepoints unrepresentable in the target encoding are dropped: Node's + // Buffer substitutes them with `?` (0x3F), and including those would cause + // literal `?` bytes in the input to be trimmed under `latin1`/`ascii`. + const timchars = [ + // Basic Latin + 0x0020, // [Space](https://www.fileformat.info/info/unicode/char/0020/index.htm) + 0x0009, // [CHARACTER TABULATION (HT)](https://www.fileformat.info/info/unicode/char/0009/index.htm) + 0x000a, // [LINE FEED (LF)](https://www.fileformat.info/info/unicode/char/000a/index.htm) + 0x000d, // [CARRIAGE RETURN (CR)](https://www.fileformat.info/info/unicode/char/000d/index.htm) + 0x000c, // [FORM FEED (FF)](https://www.fileformat.info/info/unicode/char/000c/index.htm) + 0x000b, // [LINE TABULATION (VT)](https://www.fileformat.info/info/unicode/char/000b/index.htm) + // Latin-1 Supplement + 0x00a0, // [NO-BREAK SPACE (NBSP)](https://www.fileformat.info/info/unicode/char/00a0/index.htm) + // Ogham + 0x1680, // [OGHAM SPACE MARK](https://www.fileformat.info/info/unicode/char/1680/index.htm) + // General Punctuation + 0x2000, // [EN QUAD](https://www.fileformat.info/info/unicode/char/2000/index.htm) + 0x2001, // [EM QUAD](https://www.fileformat.info/info/unicode/char/2001/index.htm) + 0x2002, // [EN SPACE](https://www.fileformat.info/info/unicode/char/2002/index.htm) + 0x2003, // [EM SPACE](https://www.fileformat.info/info/unicode/char/2003/index.htm) + 0x2004, // [THREE-PER-EM SPACE](https://www.fileformat.info/info/unicode/char/2004/index.htm) + 0x2005, // [FOUR-PER-EM SPACE](https://www.fileformat.info/info/unicode/char/2005/index.htm) + 0x2006, // [SIX-PER-EM SPACE](https://www.fileformat.info/info/unicode/char/2006/index.htm) + 0x2007, // [FIGURE SPACE](https://www.fileformat.info/info/unicode/char/2007/index.htm) + 0x2008, // [PUNCTUATION SPACE](https://www.fileformat.info/info/unicode/char/2008/index.htm) + 0x2009, // [THIN SPACE](https://www.fileformat.info/info/unicode/char/2009/index.htm) + 0x200a, // [HAIR SPACE](https://www.fileformat.info/info/unicode/char/200a/index.htm) + 0x2028, // [LINE SEPARATOR](https://www.fileformat.info/info/unicode/char/2028/index.htm) + 0x2029, // [PARAGRAPH SEPARATOR](https://www.fileformat.info/info/unicode/char/2029/index.htm) + 0x202f, // [NARROW NO-BREAK SPACE (NNBSP)](https://www.fileformat.info/info/unicode/char/202f/index.htm) + 0x205f, // [MEDIUM MATHEMATICAL SPACE (MMSP)](https://www.fileformat.info/info/unicode/char/205f/index.htm) + 0x3000, // [IDEOGRAPHIC SPACE](https://www.fileformat.info/info/unicode/char/3000/index.htm) + 0xfeff, // [ZERO WIDTH NO-BREAK SPACE (BOM)](https://www.fileformat.info/info/unicode/char/feff/index.htm) + ].reduce((acc, codepoint) => { + const encoded = Buffer.from( + String.fromCharCode(codepoint), + options.encoding, + ); + if (codepoint !== 0x3f && encoded.length === 1 && encoded[0] === 0x3f) { + return acc; + } + acc.push(encoded); + return acc; + }, []); + // First-byte lookup table for `__isCharTrimable`. Non-whitespace bytes + // (the common case) bail out in O(1) without scanning every timchar. + const timcharFirstBytes = new Uint8Array(256); + for (const t of timchars) timcharFirstBytes[t[0]] = 1; + return { + bomSkipped: false, + bufBytesStart: 0, + castField: options.cast_function, + commenting: false, + delimiterBufPrevious: undefined, + delimiterDiscovered: false, + // Current error encountered by a record + error: undefined, + enabled: options.from_line === 1, + escaping: false, + escapeIsQuote: + Buffer.isBuffer(options.escape) && + Buffer.isBuffer(options.quote) && + Buffer.compare(options.escape, options.quote) === 0, + // columns can be `false`, `true`, `Array` + expectedRecordLength: Array.isArray(options.columns) + ? options.columns.length + : undefined, + field: new ResizeableBuffer(20), + firstLineToHeaders: options.cast_first_line_to_header, + needMoreDataSize: Math.max( + // Skip if the remaining buffer smaller than comment + options.comment !== null ? options.comment.length : 0, + // Skip if the remaining buffer can be delimiter + ...(options.delimiter + ? options.delimiter.map((delimiter) => delimiter.length) + : []), + // Auto discovery of delimiter is limited to 1 character + options.delimiter_auto ? 1 : 0, + // Skip if the remaining buffer can be escape sequence + options.quote !== null ? options.quote.length : 0, + // Skip if the remaining buffer can be a multi-byte trim character + ...timchars.map((t) => t.length), + ), + previousBuf: undefined, + quoting: false, + stop: false, + rawBuffer: new ResizeableBuffer(100), + record: [], + recordHasError: false, + record_length: 0, + recordDelimiterMaxLength: + options.record_delimiter.length === 0 + ? 0 + : Math.max(...options.record_delimiter.map((v) => v.length)), + trimChars: [ + Buffer.from(" ", options.encoding)[0], + Buffer.from("\t", options.encoding)[0], + ], + wasQuoting: false, + wasRowDelimiter: false, + timchars: timchars, + timcharFirstBytes: timcharFirstBytes, + }; +}; + +const underscore = function (str) { + return str.replace(/([A-Z])/g, function (_, match) { + return "_" + match.toLowerCase(); + }); +}; + +const normalize_options = function (opts) { + const options = {}; + // Merge with user options + for (const opt in opts) { + options[underscore(opt)] = opts[opt]; + } + // Normalize option `encoding` + // Note: defined first because other options depends on it + // to convert chars/strings into buffers. + if (options.encoding === undefined || options.encoding === true) { + options.encoding = "utf8"; + } else if (options.encoding === null || options.encoding === false) { + options.encoding = null; + } else if ( + typeof options.encoding !== "string" && + options.encoding !== null + ) { + throw new CsvError( + "CSV_INVALID_OPTION_ENCODING", + [ + "Invalid option encoding:", + "encoding must be a string or null to return a buffer,", + `got ${JSON.stringify(options.encoding)}`, + ], + options, + ); + } + // Normalize option `bom` + if ( + options.bom === undefined || + options.bom === null || + options.bom === false + ) { + options.bom = false; + } else if (options.bom !== true) { + throw new CsvError( + "CSV_INVALID_OPTION_BOM", + [ + "Invalid option bom:", + "bom must be true,", + `got ${JSON.stringify(options.bom)}`, + ], + options, + ); + } + // Normalize option `cast` + options.cast_function = null; + if ( + options.cast === undefined || + options.cast === null || + options.cast === false || + options.cast === "" + ) { + options.cast = undefined; + } else if (typeof options.cast === "function") { + options.cast_function = options.cast; + options.cast = true; + } else if (options.cast !== true) { + throw new CsvError( + "CSV_INVALID_OPTION_CAST", + [ + "Invalid option cast:", + "cast must be true or a function,", + `got ${JSON.stringify(options.cast)}`, + ], + options, + ); + } + // Normalize option `cast_date` + if ( + options.cast_date === undefined || + options.cast_date === null || + options.cast_date === false || + options.cast_date === "" + ) { + options.cast_date = false; + } else if (options.cast_date === true) { + options.cast_date = function (value) { + const date = Date.parse(value); + return !isNaN(date) ? new Date(date) : value; + }; + } else if (typeof options.cast_date !== "function") { + throw new CsvError( + "CSV_INVALID_OPTION_CAST_DATE", + [ + "Invalid option cast_date:", + "cast_date must be true or a function,", + `got ${JSON.stringify(options.cast_date)}`, + ], + options, + ); + } + // Normalize option `columns` + options.cast_first_line_to_header = undefined; + if (options.columns === true) { + // Fields in the first line are converted as-is to columns + options.cast_first_line_to_header = undefined; + } else if (typeof options.columns === "function") { + options.cast_first_line_to_header = options.columns; + options.columns = true; + } else if (Array.isArray(options.columns)) { + options.columns = normalize_columns_array(options.columns); + } else if ( + options.columns === undefined || + options.columns === null || + options.columns === false + ) { + options.columns = false; + } else { + throw new CsvError( + "CSV_INVALID_OPTION_COLUMNS", + [ + "Invalid option columns:", + "expect an array, a function or true,", + `got ${JSON.stringify(options.columns)}`, + ], + options, + ); + } + // Normalize option `group_columns_by_name` + if ( + options.group_columns_by_name === undefined || + options.group_columns_by_name === null || + options.group_columns_by_name === false + ) { + options.group_columns_by_name = false; + } else if (options.group_columns_by_name !== true) { + throw new CsvError( + "CSV_INVALID_OPTION_GROUP_COLUMNS_BY_NAME", + [ + "Invalid option group_columns_by_name:", + "expect an boolean,", + `got ${JSON.stringify(options.group_columns_by_name)}`, + ], + options, + ); + } else if (options.columns === false) { + throw new CsvError( + "CSV_INVALID_OPTION_GROUP_COLUMNS_BY_NAME", + [ + "Invalid option group_columns_by_name:", + "the `columns` mode must be activated.", + ], + options, + ); + } + // Normalize option `comment` + if ( + options.comment === undefined || + options.comment === null || + options.comment === false || + options.comment === "" + ) { + options.comment = null; + } else { + if (typeof options.comment === "string") { + options.comment = Buffer.from(options.comment, options.encoding); + } + if (!Buffer.isBuffer(options.comment)) { + throw new CsvError( + "CSV_INVALID_OPTION_COMMENT", + [ + "Invalid option comment:", + "comment must be a buffer or a string,", + `got ${JSON.stringify(options.comment)}`, + ], + options, + ); + } + } + // Normalize option `comment_no_infix` + if ( + options.comment_no_infix === undefined || + options.comment_no_infix === null || + options.comment_no_infix === false + ) { + options.comment_no_infix = false; + } else if (options.comment_no_infix !== true) { + throw new CsvError( + "CSV_INVALID_OPTION_COMMENT", + [ + "Invalid option comment_no_infix:", + "value must be a boolean,", + `got ${JSON.stringify(options.comment_no_infix)}`, + ], + options, + ); + } + // Normalize option `delimiter_auto` + if ( + options.delimiter_auto === undefined || + options.delimiter_auto === null || + options.delimiter_auto === false + ) { + options.delimiter_auto = false; + } else if (options.delimiter_auto === true) { + options.delimiter_auto = {}; + } else if (!is_object(options.delimiter_auto)) { + throw new CsvError( + "CSV_INVALID_OPTION_DELIMITER_AUTO", + [ + "Invalid option delimiter_auto:", + "delimiter_auto must be a boolean or a configuration object,", + `got ${JSON.stringify(options.delimiter_auto)}`, + ], + options, + ); + } + if (options.delimiter_auto) { + if (options.delimiter_auto.preferred === undefined) + options.delimiter_auto.preferred = { + [",".charCodeAt(0)]: 1.8, + ["\t".charCodeAt(0)]: 1.8, + [";".charCodeAt(0)]: 1.6, + [" ".charCodeAt(0)]: 1.6, + [":".charCodeAt(0)]: 1.5, + [".".charCodeAt(0)]: 1.4, + ["/".charCodeAt(0)]: 1.4, + }; + else if (!is_object(options.delimiter_auto.preferred)) { + throw new CsvError( + "CSV_INVALID_OPTION_DELIMITER_AUTO", + [ + "Invalid option delimiter_auto:", + "preferred must be an object,", + `got ${JSON.stringify(options.delimiter_auto.preferred)}`, + ], + options, + ); + } + if (options.delimiter_auto.score === undefined) + options.delimiter_auto.score = (info, options) => { + return ( + (info.total - info.std) * (options.preferred[info.char_code] || 1) + ); + }; + else if (typeof options.delimiter_auto.score !== "function") { + throw new CsvError( + "CSV_INVALID_OPTION_DELIMITER_AUTO", + [ + "Invalid option delimiter_auto:", + "score must be a function,", + `got ${JSON.stringify(options.delimiter_auto.score)}`, + ], + options, + ); + } + if (options.delimiter_auto.size === undefined) + options.delimiter_auto.size = 2048; + else if (typeof options.delimiter_auto.size !== "number") { + throw new CsvError( + "CSV_INVALID_OPTION_DELIMITER_AUTO", + [ + "Invalid option delimiter_auto:", + "size must be a number,", + `got ${JSON.stringify(options.delimiter_auto.size)}`, + ], + options, + ); + } + } + // Normalize option `delimiter` + const delimiter_json = JSON.stringify(options.delimiter); + if (options.delimiter_auto !== false) { + options.delimiter = []; + } + if (!Array.isArray(options.delimiter)) { + if ( + options.delimiter === undefined || + options.delimiter === null || + options.delimiter === false + ) { + options.delimiter = Buffer.from(",", options.encoding); + } + options.delimiter = [options.delimiter]; + } + options.delimiter = options.delimiter.map(function (delimiter) { + if (typeof delimiter === "string") { + delimiter = Buffer.from(delimiter, options.encoding); + } + if (!Buffer.isBuffer(delimiter) || delimiter.length === 0) { + throw new CsvError( + "CSV_INVALID_OPTION_DELIMITER", + [ + "Invalid option delimiter:", + "delimiter must be a non empty string or buffer or array of string|buffer,", + `got ${delimiter_json}`, + ], + options, + ); + } + return delimiter; + }); + // Normalize option `escape` + if (options.escape === undefined || options.escape === true) { + options.escape = Buffer.from('"', options.encoding); + } else if (typeof options.escape === "string") { + options.escape = Buffer.from(options.escape, options.encoding); + } else if (options.escape === null || options.escape === false) { + options.escape = null; + } + if (options.escape !== null) { + if (!Buffer.isBuffer(options.escape)) { + throw new Error( + `Invalid Option: escape must be a buffer, a string or a boolean, got ${JSON.stringify(options.escape)}`, + ); + } + } + // Normalize option `from` + if (options.from === undefined || options.from === null) { + options.from = 1; + } else { + if (typeof options.from === "string" && /\d+/.test(options.from)) { + options.from = parseInt(options.from); + } + if (Number.isInteger(options.from)) { + if (options.from < 0) { + throw new Error( + `Invalid Option: from must be a positive integer, got ${JSON.stringify(opts.from)}`, + ); + } + } else { + throw new Error( + `Invalid Option: from must be an integer, got ${JSON.stringify(options.from)}`, + ); + } + } + // Normalize option `from_line` + if (options.from_line === undefined || options.from_line === null) { + options.from_line = 1; + } else { + if ( + typeof options.from_line === "string" && + /\d+/.test(options.from_line) + ) { + options.from_line = parseInt(options.from_line); + } + if (Number.isInteger(options.from_line)) { + if (options.from_line <= 0) { + throw new Error( + `Invalid Option: from_line must be a positive integer greater than 0, got ${JSON.stringify(opts.from_line)}`, + ); + } + } else { + throw new Error( + `Invalid Option: from_line must be an integer, got ${JSON.stringify(opts.from_line)}`, + ); + } + } + // Normalize options `ignore_last_delimiters` + if ( + options.ignore_last_delimiters === undefined || + options.ignore_last_delimiters === null + ) { + options.ignore_last_delimiters = false; + } else if (typeof options.ignore_last_delimiters === "number") { + options.ignore_last_delimiters = Math.floor(options.ignore_last_delimiters); + if (options.ignore_last_delimiters === 0) { + options.ignore_last_delimiters = false; + } + } else if (typeof options.ignore_last_delimiters !== "boolean") { + throw new CsvError( + "CSV_INVALID_OPTION_IGNORE_LAST_DELIMITERS", + [ + "Invalid option `ignore_last_delimiters`:", + "the value must be a boolean value or an integer,", + `got ${JSON.stringify(options.ignore_last_delimiters)}`, + ], + options, + ); + } + if (options.ignore_last_delimiters === true && options.columns === false) { + throw new CsvError( + "CSV_IGNORE_LAST_DELIMITERS_REQUIRES_COLUMNS", + [ + "The option `ignore_last_delimiters`", + "requires the activation of the `columns` option", + ], + options, + ); + } + // Normalize option `info` + if ( + options.info === undefined || + options.info === null || + options.info === false + ) { + options.info = false; + } else if (options.info !== true) { + throw new Error( + `Invalid Option: info must be true, got ${JSON.stringify(options.info)}`, + ); + } + // Normalize option `max_record_size` + if ( + options.max_record_size === undefined || + options.max_record_size === null || + options.max_record_size === false + ) { + options.max_record_size = 0; + } else if ( + Number.isInteger(options.max_record_size) && + options.max_record_size >= 0 + ) ; else if ( + typeof options.max_record_size === "string" && + /\d+/.test(options.max_record_size) + ) { + options.max_record_size = parseInt(options.max_record_size); + } else { + throw new Error( + `Invalid Option: max_record_size must be a positive integer, got ${JSON.stringify(options.max_record_size)}`, + ); + } + // Normalize option `objname` + if ( + options.objname === undefined || + options.objname === null || + options.objname === false + ) { + options.objname = undefined; + } else if (Buffer.isBuffer(options.objname)) { + if (options.objname.length === 0) { + throw new Error(`Invalid Option: objname must be a non empty buffer`); + } + if (options.encoding === null) ; else { + options.objname = options.objname.toString(options.encoding); + } + } else if (typeof options.objname === "string") { + if (options.objname.length === 0) { + throw new Error(`Invalid Option: objname must be a non empty string`); + } + // Great, nothing to do + } else if (typeof options.objname === "number") ; else { + throw new Error( + `Invalid Option: objname must be a string or a buffer, got ${options.objname}`, + ); + } + if (options.objname !== undefined) { + if (typeof options.objname === "number") { + if (options.columns !== false) { + throw Error( + "Invalid Option: objname index cannot be combined with columns or be defined as a field", + ); + } + } else { + // A string or a buffer + if (options.columns === false) { + throw Error( + "Invalid Option: objname field must be combined with columns or be defined as an index", + ); + } + } + } + // Normalize option `on_record` + if (options.on_record === undefined || options.on_record === null) { + options.on_record = undefined; + } else if (typeof options.on_record !== "function") { + throw new CsvError( + "CSV_INVALID_OPTION_ON_RECORD", + [ + "Invalid option `on_record`:", + "expect a function,", + `got ${JSON.stringify(options.on_record)}`, + ], + options, + ); + } + // Normalize option `on_skip` + // options.on_skip ??= (err, chunk) => { + // this.emit('skip', err, chunk); + // }; + if ( + options.on_skip !== undefined && + options.on_skip !== null && + typeof options.on_skip !== "function" + ) { + throw new Error( + `Invalid Option: on_skip must be a function, got ${JSON.stringify(options.on_skip)}`, + ); + } + // Normalize option `quote` + if ( + options.quote === null || + options.quote === false || + options.quote === "" + ) { + options.quote = null; + } else { + if (options.quote === undefined || options.quote === true) { + options.quote = Buffer.from('"', options.encoding); + } else if (typeof options.quote === "string") { + options.quote = Buffer.from(options.quote, options.encoding); + } + if (!Buffer.isBuffer(options.quote)) { + throw new Error( + `Invalid Option: quote must be a buffer or a string, got ${JSON.stringify(options.quote)}`, + ); + } + } + // Normalize option `raw` + if ( + options.raw === undefined || + options.raw === null || + options.raw === false + ) { + options.raw = false; + } else if (options.raw !== true) { + throw new Error( + `Invalid Option: raw must be true, got ${JSON.stringify(options.raw)}`, + ); + } + // Normalize option `record_delimiter` + if (options.record_delimiter === undefined) { + options.record_delimiter = []; + } else if ( + typeof options.record_delimiter === "string" || + Buffer.isBuffer(options.record_delimiter) + ) { + if (options.record_delimiter.length === 0) { + throw new CsvError( + "CSV_INVALID_OPTION_RECORD_DELIMITER", + [ + "Invalid option `record_delimiter`:", + "value must be a non empty string or buffer,", + `got ${JSON.stringify(options.record_delimiter)}`, + ], + options, + ); + } + options.record_delimiter = [options.record_delimiter]; + } else if (!Array.isArray(options.record_delimiter)) { + throw new CsvError( + "CSV_INVALID_OPTION_RECORD_DELIMITER", + [ + "Invalid option `record_delimiter`:", + "value must be a string, a buffer or array of string|buffer,", + `got ${JSON.stringify(options.record_delimiter)}`, + ], + options, + ); + } + options.record_delimiter = options.record_delimiter.map(function (rd, i) { + if (typeof rd !== "string" && !Buffer.isBuffer(rd)) { + throw new CsvError( + "CSV_INVALID_OPTION_RECORD_DELIMITER", + [ + "Invalid option `record_delimiter`:", + "value must be a string, a buffer or array of string|buffer", + `at index ${i},`, + `got ${JSON.stringify(rd)}`, + ], + options, + ); + } else if (rd.length === 0) { + throw new CsvError( + "CSV_INVALID_OPTION_RECORD_DELIMITER", + [ + "Invalid option `record_delimiter`:", + "value must be a non empty string or buffer", + `at index ${i},`, + `got ${JSON.stringify(rd)}`, + ], + options, + ); + } + if (typeof rd === "string") { + rd = Buffer.from(rd, options.encoding); + } + return rd; + }); + // Normalize option `relax_column_count` + if (typeof options.relax_column_count === "boolean") ; else if ( + options.relax_column_count === undefined || + options.relax_column_count === null + ) { + options.relax_column_count = false; + } else { + throw new Error( + `Invalid Option: relax_column_count must be a boolean, got ${JSON.stringify(options.relax_column_count)}`, + ); + } + if (typeof options.relax_column_count_less === "boolean") ; else if ( + options.relax_column_count_less === undefined || + options.relax_column_count_less === null + ) { + options.relax_column_count_less = false; + } else { + throw new Error( + `Invalid Option: relax_column_count_less must be a boolean, got ${JSON.stringify(options.relax_column_count_less)}`, + ); + } + if (typeof options.relax_column_count_more === "boolean") ; else if ( + options.relax_column_count_more === undefined || + options.relax_column_count_more === null + ) { + options.relax_column_count_more = false; + } else { + throw new Error( + `Invalid Option: relax_column_count_more must be a boolean, got ${JSON.stringify(options.relax_column_count_more)}`, + ); + } + // Normalize option `relax_quotes` + if (typeof options.relax_quotes === "boolean") ; else if ( + options.relax_quotes === undefined || + options.relax_quotes === null + ) { + options.relax_quotes = false; + } else { + throw new Error( + `Invalid Option: relax_quotes must be a boolean, got ${JSON.stringify(options.relax_quotes)}`, + ); + } + // Normalize option `skip_empty_lines` + if (typeof options.skip_empty_lines === "boolean") ; else if ( + options.skip_empty_lines === undefined || + options.skip_empty_lines === null + ) { + options.skip_empty_lines = false; + } else { + throw new Error( + `Invalid Option: skip_empty_lines must be a boolean, got ${JSON.stringify(options.skip_empty_lines)}`, + ); + } + // Normalize option `skip_records_with_empty_values` + if (typeof options.skip_records_with_empty_values === "boolean") ; else if ( + options.skip_records_with_empty_values === undefined || + options.skip_records_with_empty_values === null + ) { + options.skip_records_with_empty_values = false; + } else { + throw new Error( + `Invalid Option: skip_records_with_empty_values must be a boolean, got ${JSON.stringify(options.skip_records_with_empty_values)}`, + ); + } + // Normalize option `skip_records_with_error` + if (typeof options.skip_records_with_error === "boolean") ; else if ( + options.skip_records_with_error === undefined || + options.skip_records_with_error === null + ) { + options.skip_records_with_error = false; + } else { + throw new Error( + `Invalid Option: skip_records_with_error must be a boolean, got ${JSON.stringify(options.skip_records_with_error)}`, + ); + } + // Normalize option `rtrim` + if ( + options.rtrim === undefined || + options.rtrim === null || + options.rtrim === false + ) { + options.rtrim = false; + } else if (options.rtrim !== true) { + throw new Error( + `Invalid Option: rtrim must be a boolean, got ${JSON.stringify(options.rtrim)}`, + ); + } + // Normalize option `ltrim` + if ( + options.ltrim === undefined || + options.ltrim === null || + options.ltrim === false + ) { + options.ltrim = false; + } else if (options.ltrim !== true) { + throw new Error( + `Invalid Option: ltrim must be a boolean, got ${JSON.stringify(options.ltrim)}`, + ); + } + // Normalize option `trim` + if ( + options.trim === undefined || + options.trim === null || + options.trim === false + ) { + options.trim = false; + } else if (options.trim !== true) { + throw new Error( + `Invalid Option: trim must be a boolean, got ${JSON.stringify(options.trim)}`, + ); + } + // Normalize options `trim`, `ltrim` and `rtrim` + if (options.trim === true && opts.ltrim !== false) { + options.ltrim = true; + } else if (options.ltrim !== true) { + options.ltrim = false; + } + if (options.trim === true && opts.rtrim !== false) { + options.rtrim = true; + } else if (options.rtrim !== true) { + options.rtrim = false; + } + // Normalize option `to` + if (options.to === undefined || options.to === null) { + options.to = -1; + } else if (options.to !== -1) { + if (typeof options.to === "string" && /\d+/.test(options.to)) { + options.to = parseInt(options.to); + } + if (Number.isInteger(options.to)) { + if (options.to <= 0) { + throw new Error( + `Invalid Option: to must be a positive integer greater than 0, got ${JSON.stringify(opts.to)}`, + ); + } + } else { + throw new Error( + `Invalid Option: to must be an integer, got ${JSON.stringify(opts.to)}`, + ); + } + } + // Normalize option `to_line` + if (options.to_line === undefined || options.to_line === null) { + options.to_line = -1; + } else if (options.to_line !== -1) { + if (typeof options.to_line === "string" && /\d+/.test(options.to_line)) { + options.to_line = parseInt(options.to_line); + } + if (Number.isInteger(options.to_line)) { + if (options.to_line <= 0) { + throw new Error( + `Invalid Option: to_line must be a positive integer greater than 0, got ${JSON.stringify(opts.to_line)}`, + ); + } + } else { + throw new Error( + `Invalid Option: to_line must be an integer, got ${JSON.stringify(opts.to_line)}`, + ); + } + } + return options; +}; + +// Discussed in [issue #400](https://github.com/adaltas/node-csv/issues/400) +// See https://github.com/python/cpython/blob/ea1b1c579f600cc85d145c60862b2e6b98701b24/Lib/csv.py#L349 +const delimiter_discover = function (records, options) { + // Normalize the configuration + if (!options) { + ({ delimiter_auto: options } = normalize_options({ delimiter_auto: true })); + } + // Convert String to Buffer + if (typeof records === "string") { + records = Buffer.from(records); + } + // Convert Buffer to an array of records + if (Buffer.isBuffer(records)) { + records = ((data) => { + const records = []; + const parser = transform({ delimiter: [] }); + const push = (record) => records.push(record); + const close = () => {}; + const error = parser.parse(data, true, push, close); + if (error !== undefined) throw error; + return records; + })(records); + } + // Info array initialization, 127 entries, one per char code + const info = Array(127) + .fill() + .map(() => ({ lines: [] })); + // Traverse each records, count occurences per char code + records.map(([record], line) => { + for (let i = 0, l = record.length; i < l; i++) { + // Count the character frequency + const code = record.charCodeAt(i); + if (info[code].lines[line] === undefined) info[code].lines[line] = 0; + info[code].lines[line]++; + } + }); + // Traverse each char code, compute the score + info.map((info, i) => { + info.char_code = i; + info.std = std(info.lines); + info.total = info.lines.reduce((acc, val) => acc + val, 0); + info.preferred = !!options.preferred[i]; + info.score = options.score(info, options); + }); + // Extract the dominant character + const result = info.reduce( + (acc, info) => (acc.score > info.score ? acc : info), + {}, + ); + return String.fromCharCode(result.char_code); +}; + +const std = function (array) { + const n = array.length; + if (n === 0) return 0; + const mean = array.reduce((a, b) => a + b) / n; + return Math.sqrt( + array.map((x) => Math.pow(x - mean, 2)).reduce((a, b) => a + b) / n, + ); +}; + +const isRecordEmpty = function (record) { + return record.every( + (field) => + field == null || (field.toString && field.toString().trim() === ""), + ); +}; + +const cr = 13; // `\r`, carriage return, 0x0D in hexadécimal, 13 in decimal +const nl = 10; // `\n`, newline, 0x0A in hexadecimal, 10 in decimal + +const boms = { + // Note, the following are equals: + // Buffer.from("\ufeff") + // Buffer.from([239, 187, 191]) + // Buffer.from('EFBBBF', 'hex') + utf8: Buffer.from([239, 187, 191]), + // Note, the following are equals: + // Buffer.from "\ufeff", 'utf16le + // Buffer.from([255, 254]) + utf16le: Buffer.from([255, 254]), +}; + +const transform = function (original_options = {}) { + const info = { + bytes: 0, + bytes_records: 0, + comment_lines: 0, + empty_lines: 0, + invalid_field_length: 0, + lines: 1, + records: 0, + }; + const options = normalize_options(original_options); + return { + info: info, + original_options: original_options, + options: options, + state: init_state(options), + __needMoreData: function (i, bufLen, end) { + if (end) return false; + const { encoding, escape, quote } = this.options; + const { quoting, needMoreDataSize, recordDelimiterMaxLength } = + this.state; + const numOfCharLeft = bufLen - i - 1; + const requiredLength = Math.max( + needMoreDataSize, + // Skip if the remaining buffer smaller than record delimiter + // If "record_delimiter" is yet to be discovered: + // 1. It is equals to `[]` and "recordDelimiterMaxLength" equals `0` + // 2. We set the length to windows line ending in the current encoding + // Note, that encoding is known from user or bom discovery at that point + // recordDelimiterMaxLength, + recordDelimiterMaxLength === 0 + ? Buffer.from("\r\n", encoding).length + : recordDelimiterMaxLength, + // Skip if remaining buffer can be an escaped quote + quoting ? (escape === null ? 0 : escape.length) + quote.length : 0, + // Skip if remaining buffer can be record delimiter following the closing quote + quoting ? quote.length + recordDelimiterMaxLength : 0, + ); + return numOfCharLeft < requiredLength; + }, + // Central parser implementation + parse: function (nextBuf, end, push, close) { + const { + bom, + comment_no_infix, + delimiter_auto, + encoding, + from_line, + ltrim, + max_record_size, + raw, + relax_quotes, + rtrim, + skip_empty_lines, + to, + to_line, + } = this.options; + let { comment, escape, quote, record_delimiter } = this.options; + const { + bomSkipped, + delimiterDiscovered, + delimiterBufPrevious, + rawBuffer, + escapeIsQuote, + } = this.state; + // Automatic delimiter discovery + if (!delimiterDiscovered && delimiter_auto) { + let delimiterBuf; + if (delimiterBufPrevious === undefined) { + delimiterBuf = nextBuf; + } else if ( + delimiterBufPrevious !== undefined && + nextBuf === undefined + ) { + delimiterBuf = delimiterBufPrevious; + } else { + delimiterBuf = Buffer.concat([delimiterBufPrevious, nextBuf]); + } + // Ensure that nextBuf is not concatenated a second time during buffer reconciliation + nextBuf = undefined; + // this.delimiterBufPrevious = delimiterBuf; + if (end || delimiterBuf.length > delimiter_auto.size) { + this.options.delimiter = [ + Buffer.from( + delimiter_discover(delimiterBuf, this.options.delimiter_auto), + ), + ]; + this.state.previousBuf = delimiterBuf; + this.state.delimiterBufPrevious = undefined; + this.state.delimiterDiscovered = true; + } else { + this.state.delimiterBufPrevious = delimiterBuf; + return; + } + } + // Previous buffers reconciliation + const { previousBuf } = this.state; + let buf; + if (previousBuf === undefined) { + if (nextBuf === undefined) { + // Handle empty string + close(); + return; + } else { + buf = nextBuf; + } + } else if (previousBuf !== undefined && nextBuf === undefined) { + buf = previousBuf; + } else { + buf = Buffer.concat([previousBuf, nextBuf]); + } + // Handle UTF BOM + if (bomSkipped === false) { + if (bom === false) { + this.state.bomSkipped = true; + } else if (buf.length < 3) { + // No enough data + if (end === false) { + // Wait for more data + this.state.previousBuf = buf; + return; + } + } else { + for (const encoding in boms) { + if (boms[encoding].compare(buf, 0, boms[encoding].length) === 0) { + // Skip BOM + const bomLength = boms[encoding].length; + this.state.bufBytesStart += bomLength; + buf = buf.slice(bomLength); + // Renormalize original options with the new encoding + const options = normalize_options({ + ...this.original_options, + encoding: encoding, + }); + // Properties are merged with the existing options instance + for (const key in options) { + this.options[key] = options[key]; + } + // Options will re-evaluate the Buffer with the new encoding + ({ comment, escape, quote } = this.options); + break; + } + } + this.state.bomSkipped = true; + } + } + const bufLen = buf.length; + let pos; + for (pos = 0; pos < bufLen; pos++) { + // Ensure we get enough space to look ahead + // There should be a way to move this out of the loop + if (this.__needMoreData(pos, bufLen, end)) { + break; + } + if (this.state.wasRowDelimiter === true) { + this.info.lines++; + this.state.wasRowDelimiter = false; + } + if (to_line !== -1 && this.info.lines > to_line) { + this.state.stop = true; + close(); + return; + } + // Auto discovery of record_delimiter, unix, mac and windows supported + if (this.state.quoting === false && record_delimiter.length === 0) { + const record_delimiterCount = this.__autoDiscoverRecordDelimiter( + buf, + pos, + ); + if (record_delimiterCount) { + record_delimiter = this.options.record_delimiter; + } + } + const chr = buf[pos]; + if (raw === true) { + rawBuffer.append(chr); + } + if ( + (chr === cr || chr === nl) && + this.state.wasRowDelimiter === false + ) { + this.state.wasRowDelimiter = true; + } + // Previous char was a valid escape char + // treat the current char as a regular char + if (this.state.escaping === true) { + this.state.escaping = false; + } else { + // Escape is only active inside quoted fields + // We are quoting, the char is an escape chr and there is a chr to escape + // if(escape !== null && this.state.quoting === true && chr === escape && pos + 1 < bufLen){ + if ( + escape !== null && + this.state.quoting === true && + this.__isEscape(buf, pos, chr) && + pos + escape.length < bufLen + ) { + if (escapeIsQuote) { + if (this.__isQuote(buf, pos + escape.length)) { + this.state.escaping = true; + pos += escape.length - 1; + continue; + } + } else { + this.state.escaping = true; + pos += escape.length - 1; + continue; + } + } + // Not currently escaping and chr is a quote + // TODO: need to compare bytes instead of single char + if (this.state.commenting === false && this.__isQuote(buf, pos)) { + if (this.state.quoting === true) { + const nextChr = buf[pos + quote.length]; + const isNextChrTrimable = + rtrim && this.__isCharTrimable(buf, pos + quote.length); + const isNextChrComment = + comment !== null && + this.__compareBytes(comment, buf, pos + quote.length, nextChr); + const isNextChrDelimiter = this.__isDelimiter( + buf, + pos + quote.length, + nextChr, + ); + const isNextChrRecordDelimiter = + record_delimiter.length === 0 + ? this.__autoDiscoverRecordDelimiter(buf, pos + quote.length) + : this.__isRecordDelimiter(nextChr, buf, pos + quote.length); + // Escape a quote + // Treat next char as a regular character + if ( + escape !== null && + this.__isEscape(buf, pos, chr) && + this.__isQuote(buf, pos + escape.length) + ) { + pos += escape.length - 1; + } else if ( + !nextChr || + isNextChrDelimiter || + isNextChrRecordDelimiter || + isNextChrComment || + isNextChrTrimable + ) { + this.state.quoting = false; + this.state.wasQuoting = true; + pos += quote.length - 1; + continue; + } else if (relax_quotes === false) { + const err = this.__error( + new CsvError( + "CSV_INVALID_CLOSING_QUOTE", + [ + "Invalid Closing Quote:", + `got "${String.fromCharCode(nextChr)}"`, + `at line ${this.info.lines}`, + "instead of delimiter, record delimiter, trimable character", + "(if activated) or comment", + ], + this.options, + this.__infoField(), + ), + ); + if (err !== undefined) return err; + } else { + this.state.quoting = false; + this.state.wasQuoting = true; + this.state.field.prepend(quote); + pos += quote.length - 1; + } + } else { + if (this.state.field.length !== 0) { + // In relax_quotes mode, treat opening quote preceded by chrs as regular + if (relax_quotes === false) { + const info = this.__infoField(); + const bom = Object.keys(boms) + .map((b) => + boms[b].equals(this.state.field.toString()) ? b : false, + ) + .filter(Boolean)[0]; + const err = this.__error( + new CsvError( + "INVALID_OPENING_QUOTE", + [ + "Invalid Opening Quote:", + `a quote is found on field ${JSON.stringify(info.column)} at line ${info.lines}, value is ${JSON.stringify(this.state.field.toString(encoding))}`, + bom ? `(${bom} bom)` : undefined, + ], + this.options, + info, + { + field: this.state.field, + }, + ), + ); + if (err !== undefined) return err; + } + } else { + this.state.quoting = true; + pos += quote.length - 1; + continue; + } + } + } + if (this.state.quoting === false) { + const recordDelimiterLength = this.__isRecordDelimiter( + chr, + buf, + pos, + ); + if (recordDelimiterLength !== 0) { + // Do not emit comments which take a full line + const skipCommentLine = + this.state.commenting && + this.state.wasQuoting === false && + this.state.record.length === 0 && + this.state.field.length === 0; + if (skipCommentLine) { + this.info.comment_lines++; + // Skip full comment line + } else { + // Activate records emission if above from_line + if ( + this.state.enabled === false && + this.info.lines + + (this.state.wasRowDelimiter === true ? 1 : 0) >= + from_line + ) { + this.state.enabled = true; + this.__resetField(); + this.__resetRecord(); + pos += recordDelimiterLength - 1; + continue; + } + // Skip if line is empty and skip_empty_lines activated + if ( + skip_empty_lines === true && + this.state.wasQuoting === false && + this.state.record.length === 0 && + this.state.field.length === 0 + ) { + this.info.empty_lines++; + pos += recordDelimiterLength - 1; + continue; + } + this.info.bytes = this.state.bufBytesStart + pos; + const errField = this.__onField(); + if (errField !== undefined) return errField; + this.info.bytes = + this.state.bufBytesStart + pos + recordDelimiterLength; + const errRecord = this.__onRecord(push); + if (errRecord !== undefined) return errRecord; + if (to !== -1 && this.info.records >= to) { + this.state.stop = true; + close(); + return; + } + } + this.state.commenting = false; + pos += recordDelimiterLength - 1; + continue; + } + if (this.state.commenting) { + continue; + } + if ( + comment !== null && + (comment_no_infix === false || + (this.state.record.length === 0 && + this.state.field.length === 0)) + ) { + const commentCount = this.__compareBytes(comment, buf, pos, chr); + if (commentCount !== 0) { + this.state.commenting = true; + continue; + } + } + const delimiterLength = this.__isDelimiter(buf, pos, chr); + if (delimiterLength !== 0) { + this.info.bytes = this.state.bufBytesStart + pos; + const errField = this.__onField(); + if (errField !== undefined) return errField; + pos += delimiterLength - 1; + continue; + } + } + } + if (this.state.commenting === false) { + if ( + max_record_size !== 0 && + this.state.record_length + this.state.field.length > max_record_size + ) { + return this.__error( + new CsvError( + "CSV_MAX_RECORD_SIZE", + [ + "Max Record Size:", + "record exceed the maximum number of tolerated bytes", + `of ${max_record_size}`, + `at line ${this.info.lines}`, + ], + this.options, + this.__infoField(), + ), + ); + } + } + const lappend = + ltrim === false || + this.state.quoting === true || + this.state.field.length !== 0 || + !this.__isCharTrimable(buf, pos); + // rtrim in non quoting is handle in __onField + const rappend = rtrim === false || this.state.wasQuoting === false; + if (lappend === true && rappend === true) { + this.state.field.append(chr); + } else if (rtrim === true && !this.__isCharTrimable(buf, pos)) { + return this.__error( + new CsvError( + "CSV_NON_TRIMABLE_CHAR_AFTER_CLOSING_QUOTE", + [ + "Invalid Closing Quote:", + "found non trimable byte after quote", + `at line ${this.info.lines}`, + ], + this.options, + this.__infoField(), + ), + ); + } else { + if (lappend === false) { + pos += this.__isCharTrimable(buf, pos) - 1; + } + continue; + } + } + if (end === true) { + // Ensure we are not ending in a quoting state + if (this.state.quoting === true) { + const err = this.__error( + new CsvError( + "CSV_QUOTE_NOT_CLOSED", + [ + "Quote Not Closed:", + `the parsing is finished with an opening quote at line ${this.info.lines}`, + ], + this.options, + this.__infoField(), + ), + ); + if (err !== undefined) return err; + } else { + // Skip last line if it has no characters + if ( + this.state.wasQuoting === true || + this.state.record.length !== 0 || + this.state.field.length !== 0 + ) { + this.info.bytes = this.state.bufBytesStart + pos; + const errField = this.__onField(); + if (errField !== undefined) return errField; + const errRecord = this.__onRecord(push); + if (errRecord !== undefined) return errRecord; + } else if (this.state.wasRowDelimiter === true) { + this.info.empty_lines++; + } else if (this.state.commenting === true) { + this.info.comment_lines++; + } + } + } else { + this.state.bufBytesStart += pos; + this.state.previousBuf = buf.slice(pos); + } + if (this.state.wasRowDelimiter === true) { + this.info.lines++; + this.state.wasRowDelimiter = false; + } + }, + __onRecord: function (push) { + const { + columns, + group_columns_by_name, + encoding, + info, + from, + relax_column_count, + relax_column_count_less, + relax_column_count_more, + raw, + skip_records_with_empty_values, + } = this.options; + const { enabled, record } = this.state; + if (enabled === false) { + return this.__resetRecord(); + } + // Convert the first line into column names + const recordLength = record.length; + if (columns === true) { + if (skip_records_with_empty_values === true && isRecordEmpty(record)) { + this.__resetRecord(); + return; + } + return this.__firstLineToColumns(record); + } + if (columns === false && this.info.records === 0) { + this.state.expectedRecordLength = recordLength; + } + if (recordLength !== this.state.expectedRecordLength) { + const err = + columns === false + ? new CsvError( + "CSV_RECORD_INCONSISTENT_FIELDS_LENGTH", + [ + "Invalid Record Length:", + `expect ${this.state.expectedRecordLength},`, + `got ${recordLength} on line ${this.info.lines}`, + ], + this.options, + this.__infoField(), + { + record: record, + }, + ) + : new CsvError( + "CSV_RECORD_INCONSISTENT_COLUMNS", + [ + "Invalid Record Length:", + `columns length is ${columns.length},`, // rename columns + `got ${recordLength} on line ${this.info.lines}`, + ], + this.options, + this.__infoField(), + { + record: record, + }, + ); + if ( + relax_column_count === true || + (relax_column_count_less === true && + recordLength < this.state.expectedRecordLength) || + (relax_column_count_more === true && + recordLength > this.state.expectedRecordLength) + ) { + this.info.invalid_field_length++; + this.state.error = err; + // Error is undefined with skip_records_with_error + } else { + const finalErr = this.__error(err); + if (finalErr) return finalErr; + } + } + if (skip_records_with_empty_values === true && isRecordEmpty(record)) { + this.__resetRecord(); + return; + } + if (this.state.recordHasError === true) { + this.__resetRecord(); + this.state.recordHasError = false; + return; + } + this.info.records++; + if (from === 1 || this.info.records >= from) { + const { objname } = this.options; + // With columns, records are object + if (columns !== false) { + const obj = {}; + // Transform record array to an object + for (let i = 0, l = record.length; i < l; i++) { + if (columns[i] === undefined || columns[i].disabled) continue; + // Turn duplicate columns into an array + if ( + group_columns_by_name === true && + obj[columns[i].name] !== undefined + ) { + if (Array.isArray(obj[columns[i].name])) { + obj[columns[i].name] = obj[columns[i].name].concat(record[i]); + } else { + obj[columns[i].name] = [obj[columns[i].name], record[i]]; + } + } else { + obj[columns[i].name] = record[i]; + } + } + // Without objname (default) + if (raw === true || info === true) { + const extRecord = Object.assign( + { record: obj }, + raw === true + ? { raw: this.state.rawBuffer.toString(encoding) } + : {}, + info === true ? { info: this.__infoRecord() } : {}, + ); + const err = this.__push( + objname === undefined ? extRecord : [obj[objname], extRecord], + push, + ); + if (err) { + return err; + } + } else { + const err = this.__push( + objname === undefined ? obj : [obj[objname], obj], + push, + ); + if (err) { + return err; + } + } + // Without columns, records are array + } else { + if (raw === true || info === true) { + const extRecord = Object.assign( + { record: record }, + raw === true + ? { raw: this.state.rawBuffer.toString(encoding) } + : {}, + info === true ? { info: this.__infoRecord() } : {}, + ); + const err = this.__push( + objname === undefined ? extRecord : [record[objname], extRecord], + push, + ); + if (err) { + return err; + } + } else { + const err = this.__push( + objname === undefined ? record : [record[objname], record], + push, + ); + if (err) { + return err; + } + } + } + } + this.__resetRecord(); + }, + __firstLineToColumns: function (record) { + const { firstLineToHeaders } = this.state; + try { + const headers = + firstLineToHeaders === undefined + ? record + : firstLineToHeaders.call(null, record); + if (!Array.isArray(headers)) { + return this.__error( + new CsvError( + "CSV_INVALID_COLUMN_MAPPING", + [ + "Invalid Column Mapping:", + "expect an array from column function,", + `got ${JSON.stringify(headers)}`, + ], + this.options, + this.__infoField(), + { + headers: headers, + }, + ), + ); + } + const normalizedHeaders = normalize_columns_array(headers); + this.state.expectedRecordLength = normalizedHeaders.length; + this.options.columns = normalizedHeaders; + this.__resetRecord(); + return; + } catch (err) { + return err; + } + }, + __resetRecord: function () { + if (this.options.raw === true) { + this.state.rawBuffer.reset(); + } + this.state.error = undefined; + this.state.record = []; + this.state.record_length = 0; + }, + __onField: function () { + const { cast, encoding, rtrim, max_record_size } = this.options; + const { enabled, wasQuoting } = this.state; + // Short circuit for the from_line options + if (enabled === false) { + return this.__resetField(); + } + let field = this.state.field.toString(encoding); + if (rtrim === true && wasQuoting === false) { + field = field.trimRight(); + } + if (cast === true) { + const [err, f] = this.__cast(field); + if (err !== undefined) return err; + field = f; + } + this.state.record.push(field); + // Increment record length if record size must not exceed a limit + if (max_record_size !== 0 && typeof field === "string") { + this.state.record_length += field.length; + } + this.__resetField(); + }, + __resetField: function () { + this.state.field.reset(); + this.state.wasQuoting = false; + }, + __push: function (record, push) { + const { on_record } = this.options; + if (on_record !== undefined) { + const info = this.__infoRecord(); + try { + record = on_record.call(null, record, info); + } catch (err) { + return err; + } + if (record === undefined || record === null) { + return; + } + } + this.info.bytes_records += this.info.bytes; + push(record); + }, + // Return a tuple with the error and the casted value + __cast: function (field) { + const { columns, relax_column_count } = this.options; + const isColumns = Array.isArray(columns); + // Dont loose time calling cast + // because the final record is an object + // and this field can't be associated to a key present in columns + if ( + isColumns === true && + relax_column_count && + this.options.columns.length <= this.state.record.length + ) { + return [undefined, undefined]; + } + if (this.state.castField !== null) { + try { + const info = this.__infoField(); + return [undefined, this.state.castField.call(null, field, info)]; + } catch (err) { + return [err]; + } + } + if (this.__isFloat(field)) { + return [undefined, parseFloat(field)]; + } else if (this.options.cast_date !== false) { + const info = this.__infoField(); + return [undefined, this.options.cast_date.call(null, field, info)]; + } + return [undefined, field]; + }, + __compareBytes: function (sourceBuf, targetBuf, targetPos, firstByte) { + if (sourceBuf[0] !== firstByte) return 0; + const sourceLength = sourceBuf.length; + for (let i = 1; i < sourceLength; i++) { + if (sourceBuf[i] !== targetBuf[targetPos + i]) return 0; + } + return sourceLength; + }, + // Helper to test if a character is trimable + __isCharTrimable: function (buf, pos) { + const { timchars, timcharFirstBytes } = this.state; + // Fast bail-out: non-whitespace bytes (the common case) are rejected + // without scanning the full timchar list. + const first = buf[pos]; + if (first === undefined || timcharFirstBytes[first] === 0) return 0; + loop1: for (let i = 0; i < timchars.length; i++) { + const timchar = timchars[i]; + for (let j = 0; j < timchar.length; j++) { + if (timchar[j] !== buf[pos + j]) continue loop1; + } + return timchar.length; + } + return 0; + }, + __isDelimiter: function (buf, pos, chr) { + const { delimiter, ignore_last_delimiters } = this.options; + if ( + ignore_last_delimiters === true && + this.state.record.length === this.options.columns.length - 1 + ) { + return 0; + } else if ( + ignore_last_delimiters !== false && + typeof ignore_last_delimiters === "number" && + this.state.record.length === ignore_last_delimiters - 1 + ) { + return 0; + } + loop1: for (let i = 0; i < delimiter.length; i++) { + const del = delimiter[i]; + if (del[0] === chr) { + for (let j = 1; j < del.length; j++) { + if (del[j] !== buf[pos + j]) continue loop1; + } + return del.length; + } + } + return 0; + }, + __isEscape: function (buf, pos, chr) { + const { escape } = this.options; + if (escape === null) return false; + const l = escape.length; + if (escape[0] === chr) { + for (let i = 0; i < l; i++) { + if (escape[i] !== buf[pos + i]) { + return false; + } + } + return true; + } + return false; + }, + __isFloat: function (value) { + return value - parseFloat(value) + 1 >= 0; // Borrowed from jquery + }, + // Keep it in case we implement the `cast_int` option + // __isInt(value){ + // // return Number.isInteger(parseInt(value)) + // // return !isNaN( parseInt( obj ) ); + // return /^(\-|\+)?[1-9][0-9]*$/.test(value) + // } + __isQuote: function (buf, pos) { + const { quote } = this.options; + if (quote === null) return false; + const l = quote.length; + for (let i = 0; i < l; i++) { + if (quote[i] !== buf[pos + i]) { + return false; + } + } + return true; + }, + __isRecordDelimiter: function (chr, buf, pos) { + const { record_delimiter } = this.options; + const recordDelimiterLength = record_delimiter.length; + loop1: for (let i = 0; i < recordDelimiterLength; i++) { + const rd = record_delimiter[i]; + const rdLength = rd.length; + if (rd[0] !== chr) { + continue; + } + for (let j = 1; j < rdLength; j++) { + if (rd[j] !== buf[pos + j]) { + continue loop1; + } + } + return rd.length; + } + return 0; + }, + __autoDiscoverRecordDelimiter: function (buf, pos) { + const { encoding } = this.options; + // Note, we don't need to cache this information in state, + // It is only called on the first line until we find out a suitable + // record delimiter. + const rds = [ + // Important, the windows line ending must be before mac os 9 + Buffer.from("\r\n", encoding), + Buffer.from("\n", encoding), + Buffer.from("\r", encoding), + ]; + loop: for (let i = 0; i < rds.length; i++) { + const l = rds[i].length; + for (let j = 0; j < l; j++) { + if (rds[i][j] !== buf[pos + j]) { + continue loop; + } + } + this.options.record_delimiter.push(rds[i]); + this.state.recordDelimiterMaxLength = rds[i].length; + return rds[i].length; + } + return 0; + }, + __error: function (msg) { + const { encoding, raw, skip_records_with_error } = this.options; + const err = typeof msg === "string" ? new Error(msg) : msg; + if (skip_records_with_error) { + this.state.recordHasError = true; + if (this.options.on_skip !== undefined) { + try { + this.options.on_skip( + err, + raw ? this.state.rawBuffer.toString(encoding) : undefined, + ); + } catch (err) { + return err; + } + } + // this.emit('skip', err, raw ? this.state.rawBuffer.toString(encoding) : undefined); + return undefined; + } else { + return err; + } + }, + __infoDataSet: function () { + return { + ...this.info, + columns: this.options.columns, + }; + }, + __infoRecord: function () { + const { columns, raw, encoding } = this.options; + return { + ...this.__infoDataSet(), + bytes_records: this.info.bytes, + error: this.state.error, + header: columns === true, + index: this.state.record.length, + raw: raw ? this.state.rawBuffer.toString(encoding) : undefined, + }; + }, + __infoField: function () { + const { columns } = this.options; + const isColumns = Array.isArray(columns); + // Bytes records are only incremented when all records'fields are parsed + const bytes_records = this.info.bytes_records; + return { + ...this.__infoRecord(), + bytes_records: bytes_records, + column: + isColumns === true + ? columns.length > this.state.record.length + ? columns[this.state.record.length].name + : null + : this.state.record.length, + quoting: this.state.wasQuoting, + }; + }, + }; +}; + +const parse = (opts) => { + const api = transform(opts); + let controller; + const enqueue = (record) => { + controller.enqueue(record); + }; + const terminate = () => { + controller.terminate(); + }; + return new web.TransformStream( + { + start(ctr) { + controller = ctr; + }, + transform(chunk) { + const error = api.parse(chunk, false, enqueue, terminate); + if (error) { + controller.error(error); + } + }, + flush() { + const error = api.parse(undefined, true, enqueue, terminate); + if (error) { + controller.error(error); + } + }, + }, + new web.CountQueuingStrategy({ highWaterMark: 1024 }), + new web.CountQueuingStrategy({ highWaterMark: 1024 }), + ); +}; + +exports.CsvError = CsvError; +exports.normalize_options = normalize_options; +exports.parse = parse; diff --git a/packages/csv-parse/dist/cjs/stream.d.cts b/packages/csv-parse/dist/cjs/stream.d.cts new file mode 100644 index 00000000..6afa2959 --- /dev/null +++ b/packages/csv-parse/dist/cjs/stream.d.cts @@ -0,0 +1,23 @@ +import { Options } from "./index.cjs"; + +declare function parse(options?: Options): TransformStream; + +export { parse }; + +export { + CastingContext, // Deprecated + CastingFunction, + CastingDateFunction, + ColumnOption, + Options, + OptionsNormalized, + OptionsWithColumns, + Info, + InfoCallback, + InfoDataSet, + InfoRecord, + InfoField, + CsvErrorCode, + CsvError, + normalize_options, +} from "./index.cjs"; diff --git a/packages/csv-parse/package.json b/packages/csv-parse/package.json index ef2b0212..6f0d1d9d 100644 --- a/packages/csv-parse/package.json +++ b/packages/csv-parse/package.json @@ -113,7 +113,7 @@ "scripts": { "build": "npm run build:rollup && npm run build:ts", "build:rollup": "npx rollup -c", - "build:ts": "cp lib/index.d.ts dist/cjs/index.d.cts && cp lib/sync.d.ts dist/cjs/sync.d.cts && cp lib/*.ts dist/esm", + "build:ts": "cp lib/index.d.ts dist/cjs/index.d.cts && cp lib/sync.d.ts dist/cjs/sync.d.cts && cp lib/stream.d.ts dist/cjs/stream.d.cts && cp lib/*.ts dist/esm", "postbuild:ts": "find dist/cjs -name '*.d.cts' -exec sh -c \"sed -i \"s/\\.js'/\\.cjs'/g\" {} || sed -i '' \"s/\\.js'/\\.cjs'/g\" {}\" \\;", "lint:check": "eslint", "lint:fix": "eslint --fix", diff --git a/packages/csv-parse/rollup.config.js b/packages/csv-parse/rollup.config.js index e978b577..95bfabe0 100644 --- a/packages/csv-parse/rollup.config.js +++ b/packages/csv-parse/rollup.config.js @@ -99,4 +99,19 @@ export default [ nodeResolve(), ], }, + { + input: "lib/stream.js", + output: [ + { + file: `dist/cjs/stream.cjs`, + format: "cjs", + }, + ], + plugins: [ + // eslint({ + // fix: true, + // }), + nodeResolve(), + ], + }, ]; diff --git a/packages/csv-parse/test/api.stream.cjs_export.js b/packages/csv-parse/test/api.stream.cjs_export.js new file mode 100644 index 00000000..3c3e4fa0 --- /dev/null +++ b/packages/csv-parse/test/api.stream.cjs_export.js @@ -0,0 +1,19 @@ +import { strict as assert } from "node:assert"; +import { createRequire } from "node:module"; +import { readFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; + +const require = createRequire(import.meta.url); + +describe("API stream CommonJS export", function () { + it("loads the declared require target for csv-parse/stream", function () { + const pkg = JSON.parse( + readFileSync(new URL("../package.json", import.meta.url), "utf8"), + ); + const target = pkg.exports["./stream"].require.default; + const streamExport = require( + fileURLToPath(new URL(`..${target.slice(1)}`, import.meta.url)), + ); + assert.equal(typeof streamExport.parse, "function"); + }); +}); From e5a57411ddab387c70fcfe5dc1f7d65a8676093d Mon Sep 17 00:00:00 2001 From: YfengJ <166808804+YfengJ@users.noreply.github.com> Date: Sat, 20 Jun 2026 19:40:48 +0800 Subject: [PATCH 2/2] test: cover csv-parse stream in cjs demo --- demo/cjs/lib/parse_stream.js | 35 +++++++++++++++++++ demo/cjs/lib/parse_stream.ts | 35 +++++++++++++++++++ .../csv-parse/test/api.stream.cjs_export.js | 19 ---------- 3 files changed, 70 insertions(+), 19 deletions(-) create mode 100644 demo/cjs/lib/parse_stream.js create mode 100644 demo/cjs/lib/parse_stream.ts delete mode 100644 packages/csv-parse/test/api.stream.cjs_export.js diff --git a/demo/cjs/lib/parse_stream.js b/demo/cjs/lib/parse_stream.js new file mode 100644 index 00000000..653577c3 --- /dev/null +++ b/demo/cjs/lib/parse_stream.js @@ -0,0 +1,35 @@ +const assert = require("node:assert"); +const version = parseInt(/^v(\d+)/.exec(process.version)[1], 10); +const { parse } = require( + version >= 14 ? "csv-parse/stream" : "csv-parse/dist/cjs/stream.cjs", +); + +(async function () { + const output = []; + const parser = parse({ + delimiter: ":", + }); + const writer = parser.writable.getWriter(); + const reader = parser.readable.getReader(); + const encoder = new TextEncoder(); + const read = (async function () { + while (true) { + const { value, done } = await reader.read(); + if (done) break; + output.push(value); + } + })(); + + await writer.write(encoder.encode("a:b:c\n")); + await writer.write(encoder.encode("1:2:3\n")); + await writer.close(); + await read; + + assert.deepStrictEqual(output, [ + ["a", "b", "c"], + ["1", "2", "3"], + ]); +})().catch(function (err) { + console.error(err); + process.exitCode = 1; +}); diff --git a/demo/cjs/lib/parse_stream.ts b/demo/cjs/lib/parse_stream.ts new file mode 100644 index 00000000..e6b043ed --- /dev/null +++ b/demo/cjs/lib/parse_stream.ts @@ -0,0 +1,35 @@ +import assert from "node:assert"; +// Node.js >= 14 +import { parse } from "csv-parse/stream"; +// Node.js < 14 +// import { parse } from 'csv-parse/dist/cjs/stream' + +(async function () { + const output: string[][] = []; + const parser = parse({ + delimiter: ":", + }); + const writer = parser.writable.getWriter(); + const reader = parser.readable.getReader(); + const encoder = new TextEncoder(); + const read = (async function () { + while (true) { + const { value, done } = await reader.read(); + if (done) break; + output.push(value); + } + })(); + + await writer.write(encoder.encode("a:b:c\n")); + await writer.write(encoder.encode("1:2:3\n")); + await writer.close(); + await read; + + assert.deepStrictEqual(output, [ + ["a", "b", "c"], + ["1", "2", "3"], + ]); +})().catch(function (err: unknown) { + console.error(err); + process.exitCode = 1; +}); diff --git a/packages/csv-parse/test/api.stream.cjs_export.js b/packages/csv-parse/test/api.stream.cjs_export.js deleted file mode 100644 index 3c3e4fa0..00000000 --- a/packages/csv-parse/test/api.stream.cjs_export.js +++ /dev/null @@ -1,19 +0,0 @@ -import { strict as assert } from "node:assert"; -import { createRequire } from "node:module"; -import { readFileSync } from "node:fs"; -import { fileURLToPath } from "node:url"; - -const require = createRequire(import.meta.url); - -describe("API stream CommonJS export", function () { - it("loads the declared require target for csv-parse/stream", function () { - const pkg = JSON.parse( - readFileSync(new URL("../package.json", import.meta.url), "utf8"), - ); - const target = pkg.exports["./stream"].require.default; - const streamExport = require( - fileURLToPath(new URL(`..${target.slice(1)}`, import.meta.url)), - ); - assert.equal(typeof streamExport.parse, "function"); - }); -});