-
-
Save nerycordova/5cf0e169d330d8fbba85529d14907d31 to your computer and use it in GitHub Desktop.
//Dev.to article: https://dev.to/nerycordova/unzip-large-files-in-aws-using-lambda-and-node-js-cpp | |
const AWS = require("aws-sdk"); | |
const s3 = new AWS.S3({ apiVersion: "2006-03-01" }); | |
const unzipper = require("unzipper"); | |
exports.handler = async (event) => { | |
//...initialize bucket, filename and target_filename here | |
try { | |
/** | |
* Step 1: Get stream of the file to be extracted from the zip | |
*/ | |
const file_stream = s3 | |
.getObject({ Bucket: bucket, Key: filename }) | |
.createReadStream() | |
.on("error", (e) => console.log(`Error extracting file: `, e)) | |
.pipe( | |
unzipper.ParseOne("file_name_inside_zip.ext", { | |
forceStream: true, | |
}) | |
); | |
/** | |
* Step 2: upload extracted stream back to S3: this method supports a readable stream in the Body param as per | |
* https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#upload-property | |
*/ | |
await s3 | |
.upload({ Bucket: bucket, Key: target_filename, Body: file_stream }) | |
.promise(); | |
} catch (error) { | |
console.log("Error: ", error.message, error.stack); | |
} | |
}; |
@bartoszgolebiowski super! Happy to read that you found it helpful.
Hello, I'm new to this and need some help. How do I get the filename of the file inside the zip?
unzipper.ParseOne("file_name_inside_zip.ext"
I won't know the name ahead of time. Then I want to stream that file back to another S3 bucket. This code does work, but I had to hard-code the unzipped_filename to get it working.
const unzipped_filename = "test.csv";
.pipe(unzipper.ParseOne(unzipped_filename, {forceStream: true,}));
.upload({ Bucket: target_bucket, Key: unzipped_filename, Body: file_stream })
I am getting the below error on my cloud watch logs -
2022-09-23T12:08:59.141Z undefined ERROR Uncaught Exception {
"errorType": "Runtime.ImportModuleError",
"errorMessage": "Error: Cannot find module 'unzipper'\nRequire stack:\n- /var/task/index.js\n- /var/runtime/index.mjs",
"stack": [
"Runtime.ImportModuleError: Error: Cannot find module 'unzipper'",
"Require stack:",
"- /var/task/index.js",
"- /var/runtime/index.mjs",
" at _loadUserApp (file:///var/runtime/index.mjs:951:17)",
" at async Object.UserFunction.js.module.exports.load (file:///var/runtime/index.mjs:976:21)",
" at async start (file:///var/runtime/index.mjs:1137:23)",
" at async file:///var/runtime/index.mjs:1143:1"
]
}
@saswatapurohit seems that this module is not included in your lambda package. Make sure that you're packing all your libraries before pushing the code to your Lambda.
Hello, I'm new to this and need some help. How do I get the filename of the file inside the zip? unzipper.ParseOne("file_name_inside_zip.ext" I won't know the name ahead of time. Then I want to stream that file back to another S3 bucket. This code does work, but I had to hard-code the unzipped_filename to get it working. const unzipped_filename = "test.csv"; .pipe(unzipper.ParseOne(unzipped_filename, {forceStream: true,})); .upload({ Bucket: target_bucket, Key: unzipped_filename, Body: file_stream })
@tdough21 hope you were able to solve this back in May. In case not and for the record, in the docs you will see that most of the examples have this conditionif (fileName === "this IS the file I'm looking for")
, see this one for example:
const zip = fs.createReadStream('path/to/archive.zip').pipe(unzipper.Parse({forceStream: true}));
for await (const entry of zip) {
const fileName = entry.path;
const type = entry.type; // 'Directory' or 'File'
const size = entry.vars.uncompressedSize; // There is also compressedSize;
if (fileName === "this IS the file I'm looking for") {
entry.pipe(fs.createWriteStream('output/path'));
} else {
entry.autodrain();
}
}
So, by design, the library allows you to get all file names inside the .zip package.
Thanks, this is what I am looking for. Thank you!