Created
May 18, 2018 03:02
-
-
Save IceCreamYou/a767bc34f89e177d990ae0b41ad6c576 to your computer and use it in GitHub Desktop.
A reasonably robust, relatively comprehensible CSV / DSV parser, <1 KB minified, for when you don't want to import a giant library. MIT Licensed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export type CSVOptions = Partial<{ | |
fieldSeparator: string, | |
rowSeparator: string, | |
quote: string, | |
trimFields: boolean, | |
sanitizeRowSeparators: boolean, | |
}>; | |
/** | |
* Converts a CSV string to an array of arrays. | |
* | |
* Adapted from http://code.google.com/p/csv-to-array/ v2.1 by Daniel Tillin | |
* under the MIT License: https://opensource.org/licenses/mit-license.php | |
* | |
* Some test cases can be found at https://codepen.io/IceCreamYou/pen/JveBmy | |
* | |
* @param inputString | |
* The CSV file to parse, as a string. | |
* @param options | |
* Options describing how to parse the CSV input: | |
* - fieldSeparator: The delimiter between fields | |
* - rowSeparator: The delimiter between rows; can be `\r`, `\n`, or `\r\n` | |
* - quote: The field escape character | |
* - trimFields: Whether to ignore whitespace around fields | |
* - sanitizeRowSeparators: Whether to accept multiple line ending formats | |
*/ | |
export function csvToArray(inputString: string, options: CSVOptions = {}) { | |
const { fieldSeparator, rowSeparator, quote, trimFields, sanitizeRowSeparators } = { | |
fieldSeparator: ',', | |
rowSeparator: '\n', | |
quote: '"', | |
trimFields: false, | |
sanitizeRowSeparators: true, | |
...options, | |
}, | |
rowSepChar1 = rowSeparator.charAt(0), | |
rowSepChar2 = rowSeparator.charAt(1); | |
if (sanitizeRowSeparators) { | |
inputString = inputString.replace(/\r\n|\r/g, rowSeparator); | |
} | |
const outputArray = [['']], | |
inputLength = inputString.length; | |
for (let strPos = 0, rowIndex = 0, fieldIndex = 0, foundQuote = false; strPos < inputLength; strPos++) { | |
const currentCharacter = inputString.charAt(strPos); | |
switch (currentCharacter) { | |
case quote: | |
// Found an escaped quote (""); add to field | |
if (foundQuote && inputString.charAt(strPos + 1) === quote) { | |
outputArray[rowIndex][fieldIndex] += quote; | |
strPos++; | |
} | |
// Start or end a quoted field | |
else { | |
foundQuote = !foundQuote; | |
} | |
break; | |
case fieldSeparator: | |
// Found a new field | |
if (!foundQuote) { | |
// Trim the previous field | |
if (trimFields) { | |
outputArray[rowIndex][fieldIndex] = outputArray[rowIndex][fieldIndex].trim(); | |
} | |
// Start a new field | |
outputArray[rowIndex][++fieldIndex] = ''; | |
} | |
// We're in a quoted field so the field separator is not a special character; add it to the field | |
else { | |
outputArray[rowIndex][fieldIndex] += currentCharacter; | |
} | |
break; | |
case rowSepChar1: | |
// Found a row delimiter | |
if (!foundQuote && (!rowSepChar2 || rowSepChar2 === inputString.charAt(strPos + 1))) { | |
// Trim the previous field | |
if (trimFields) { | |
outputArray[rowIndex][fieldIndex] = outputArray[rowIndex][fieldIndex].trim(); | |
} | |
// Start a new row | |
outputArray[++rowIndex] = ['']; | |
fieldIndex = 0; | |
// If the rowSeparator is \r\n, the current character is \r; skip the \n | |
if (rowSepChar2) strPos++; | |
} | |
// We're in a quoted field so the row separator is not a special character; add it to the field | |
else { | |
outputArray[rowIndex][fieldIndex] += currentCharacter; | |
} | |
break; | |
// Found a non-special character; add it to the field | |
default: | |
outputArray[rowIndex][fieldIndex] += currentCharacter; | |
} | |
} | |
// Return the result | |
return outputArray; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment