Last active
November 28, 2024 12:12
-
-
Save PhiLhoSoft/ff60eefcb8ed43326cd7 to your computer and use it in GitHub Desktop.
Parse CSV file with Node.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Area Type | Town name | number value | region | sub-region | |
---|---|---|---|---|---|
Unknown Area Type | Hlegu | 1511.19895194 | Yangon | Yangon, (North) | |
Unknown Area Type | Cocokyun | 33.8113207395 | Yangon | Yangon, (South) | |
Unknown Area Type | Mese | 1818.94431751 | Kayah | Bawlake |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Parses a CSV file. | |
// If told it has a header, the callback is called with an object (per line) where the keys are the header names. | |
// If asked to return field indexes, the callback is called with an array (per line) with the extracted values. | |
// | |
// Made to run on the Node.js platform. | |
// Should accept more encodings, using a conversion library. | |
var fs = require('fs'); | |
// http://blog.james-carr.org/2010/07/07/parsing-csv-files-with-nodejs/ | |
// https://gist.github.com/PhiLhoSoft/ff60eefcb8ed43326cd7 | |
function parseCsvFile(fileName, options, onNext, onComplete) | |
{ | |
var lineNb = 0, header = [], buffer = ''; | |
var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g; | |
var stream = fs.createReadStream(fileName, options.readOptions); | |
stream.on('data', function (data) // data is a Buffer | |
{ | |
// Add chunk of data to remainder of previous chunk => the CSV file MUST have a blank line at the end! | |
buffer += data.toString(); | |
// Cut the content of the buffer in lines | |
var lines = buffer.split(/[\r\n]+/); | |
lines.forEach(function(line, idx) | |
{ | |
// Don't process the last, partial line of this chunk | |
if (idx === lines.length - 1) return; | |
processLine(line, idx); | |
}); | |
buffer = lines[lines.length - 1]; | |
}); | |
stream.on('end', function () | |
{ | |
// Process the last, complete line of the file (skipped if empty) | |
processLine(buffer, 1); | |
if (onComplete) | |
{ | |
onComplete(); | |
} | |
}); | |
function processLine(line, idx) | |
{ | |
if (line === '') | |
return; // Skip empty lines | |
if (options.hasHeader) | |
{ | |
if (lineNb++ === 0 && idx === 0) | |
{ | |
header = line.split(pattern); | |
} | |
else | |
{ | |
onNext(buildRecord(line)); | |
} | |
} | |
else | |
{ | |
onNext(extractFields(line)); | |
} | |
} | |
function buildRecord(line) | |
{ | |
var record = {}; | |
line.split(pattern).forEach(function (value, index) | |
{ | |
if (header[index] !== '') | |
{ | |
record[header[index]] = value.replace(/"/g, ''); | |
} | |
}) | |
return record; | |
} | |
function extractFields(line) | |
{ | |
var fields = []; | |
line.split(pattern).forEach(function(value, index) | |
{ | |
if (index % 2 === 0) | |
return; // Skip, that's the separator | |
index = Math.floor(index / 2); | |
var idx; | |
if (options.fieldIndexes !== undefined) | |
{ | |
idx = options.fieldIndexes.findIndex(function (v) { return v === index; }); | |
if (idx === -1) | |
return; | |
} | |
else | |
{ | |
idx = index; | |
} | |
fields[idx] = value.replace(/"/g, ''); | |
}) | |
return fields; | |
} | |
} | |
module.exports = parseCsvFile; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Parses a CSV file. | |
// | |
// Made to run on the Node.js platform. | |
// Should accept more encodings, using a conversion library. | |
var fs = require('fs'); | |
var parseCsvFile = require('./ParseCsvFile'); | |
var arguments = process.argv; | |
var inputFileName = arguments[2], encoding; | |
if (inputFileName === undefined) | |
{ | |
// arguments[0] = "node", arguments[1] = file name | |
console.log('Usage: ' + arguments[0] + ' ' + arguments[1] + ' filePath [encoding]'); | |
return; | |
} | |
encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8' | |
inputCsv = inputFileName + '.csv'; | |
var options = { readOptions: { encoding: encoding } }; | |
//~ options.hasHeader = true; | |
//~ options.fieldIndexes = [ 1, 3, 4 ]; | |
parseCsvFile(inputCsv, options, | |
function onNext(record) | |
{ | |
console.log(record); | |
}, | |
function onComplete() | |
{ | |
console.log('Done'); | |
} | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment