Last active
February 19, 2020 15:57
-
-
Save jkga/1719e8664c9214066a03eb3e2a0c7ed6 to your computer and use it in GitHub Desktop.
NodeJS : Reading bulk file using stream (ReadLine)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const path = require('path') | |
const fs = require('fs') | |
const readline = require('readline') | |
const start = new Date() | |
const hrstart = process.hrtime() | |
const file = path.resolve(__dirname, "log.txt") | |
fs.stat(file, (err) => { | |
if(err === null) { | |
let lines = 0 // counter | |
let cursor = 0 // current line | |
let stop = 0 // | |
let streamExcess = [] // <array> | |
const readInterface = readline.createInterface({ | |
input: fs.createReadStream(file) | |
}) | |
readInterface.on('line', function(line) { | |
// counter | |
lines++ | |
if(stop === 1) { | |
//console.log('stoped: ',lines) | |
streamExcess.push(Promise.resolve(lines)) | |
} else { | |
// current cursor | |
// do all stuff here | |
// . . . | |
process.nextTick(() => { | |
// Do not forget to delete excess data from the stream after | |
// processing it to avoid duplicates & probably memory exhaustion | |
streamExcess = [] //delete to save memory | |
// reset and resume operation | |
// call this after processing the data | |
stop = 0 | |
this.resume() | |
}) | |
} | |
// mark the current position of the ongoing operation | |
cursor = lines | |
// pause | |
this.pause() | |
}) | |
readInterface.on('pause', function(line) { | |
if(cursor === lines) { | |
// current operation | |
//console.log('pause',lines) | |
// this will tell that the process already stopped | |
// and everything is an excess that should be acumulated | |
// Note: Even if you want to stop at the very first result | |
// stream would not stop as expected but rather yield few hundred lines | |
// uncomment the //console.log('stoped: ',lines) below the stop condition to see results | |
stop = 1 | |
} | |
}) | |
readInterface.on('close', function(line) { | |
// log execution time | |
hrend = process.hrtime(hrstart) | |
console.info('Execution time: %dms', (new Date() - start)) | |
console.info('Execution time (hr): %ds %dms', hrend[0], hrend[1] / 1000000) | |
}) | |
} else { | |
throw new Error('File not found') | |
} | |
}) | |
// PS: This might be different on your tests but with mine, it was 4x faster than other npm modules. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment