-
-
Save FennNaten/93a706c5dff1f39ebf701137f05f43bf to your computer and use it in GitHub Desktop.
updating https://gist.github.com/sararob/275b252d1eda3a5baa27d6464d2f2198 to add example of reducing memory usage by using file streams
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
/** | |
* this is a fork of https://gist.github.com/sararob/275b252d1eda3a5baa27d6464d2f2198 | |
* with some code added to show how reading from and writing to the big source and destination files | |
* can be done with streams to reduce memory pressure | |
* code added is there as an example to illustrate the principles involved and is not tested, so it may contains bugs | |
*/ | |
// Dependencies | |
const gcloud = require('google-cloud', { | |
projectId: 'sara-bigquery', | |
keyfileName: 'keyfile.json' | |
}); | |
const vision = gcloud.vision(); | |
const fs = require('fs'); | |
const async = require('async'); | |
//to avoid having to create ourselves a stream which emits data per line, let's use https://www.npmjs.com/package/line-by-line | |
const LineByLineReader = require('line-by-line'); | |
//this will be the file writer into which we'll dump data as soon as we get them to not store them in memory | |
//see https://nodejs.org/api/stream.html#stream_writable_streams | |
const targetStream = fs.createWriteStream('./responses.json'); | |
//as final format is a json with an array of objects, and we'll push objects one by one, let's push the character to open the array | |
targetStream.write('['); | |
const features = [ | |
{ "type": "FACE_DETECTION" }, | |
{ "type": "LABEL_DETECTION" }, | |
{ "type": "LANDMARK_DETECTION" }, | |
{ "type": "WEB_DETECTION" }, | |
{ "type": "IMAGE_PROPERTIES" } | |
]; | |
// Call the Vision API with our GCS image URL | |
function callVision(task, callback) { | |
console.log(`processing ${task.object_id}`); | |
let visionReq = { | |
"image": { | |
"source": { | |
"imageUri": `gs://gcs-storage-bucket/${task.id}.jpg` | |
} | |
}, | |
"features": features | |
} | |
vision.annotate(visionReq, function(err, annotations, resp) { | |
if (err) { | |
return callback(new Error(err)); | |
} | |
let imgMetadata = annotations[0]; | |
if (!imgMetadata['imagePropertiesAnnotation']) { | |
return callback(new Error('no data')); | |
} | |
imgMetadata['object_id'] = task.name; | |
//send the stringified object to the target file immediately, no need for keeping that in memory | |
//here I use a global targetStream, if I were to make this a library the target stream would probably be injected into callVision instead | |
targetStream.write(JSON.stringify(imgMetadata)); | |
return callback(); | |
}); | |
} | |
// The queue will send the image IDs to callVision() | |
let q = async.queue(callVision, 20); | |
function done() { | |
q.drain = null; | |
//when all data is processed, let's end the write stream | |
//depending on the way everything is implemented, there could be a bug here if the queue drains faster than we fill it | |
//to avoid this, we may add some bookkeeping: a line counter incremented on each read, a boolean indicating read end | |
//and a counter on each write. we would then only end stream if drain is emitted and allRead is true and number of written items === number of read lines | |
targetStream.end(']'); | |
} | |
// Will only be executed when the queue is drained | |
q.drain = done; | |
//now let's create our file stream to read data from the file line by line | |
const imageIdsStream = new LineByLineReader('./image-ids.json'); | |
//launch the read process. Each time a line is read, it is parsed and fed to the queue | |
//here each line is an imageID | |
imageIdsStream.on('line', function (line) { | |
q.push(JSON.parse(line), function (err) { | |
if (err) { | |
console.log(err) | |
} | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment