Last active
February 9, 2022 22:19
-
-
Save dsmrt/5e81c82e68395da8ba662a86a4dbdf01 to your computer and use it in GitHub Desktop.
S3 Select Nodejs example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// make sure to add this dependency | |
// npm i @aws-sdk/client-s3 | |
const s3lib = require("@aws-sdk/client-s3"); | |
const util = require("util"); | |
const s3 = new s3lib.S3Client({ | |
region: process.env.AWS_REGION || "us-east-1", | |
}); | |
/** | |
* Based on requirements ... | |
* 1. This expects an api gateway event with a string body of the S3 event data. | |
* 2. CSV filetype is assumed and the payload is returned in CSV format. Configure parameters as needed for the S3 Select | |
* 3. Build SQL to query file/object | |
* 4. Send API call and get results | |
* 5. Pull response from stream | |
* 6. return s3 select payload as an api response | |
* | |
* Note: IAM permissions needed -> s3:GetObject on arn:aws:s3:::bucket_name/key_name | |
*/ | |
const handler = async (event) => { | |
//1. event.body assumes the api gateway has posted the contents of an S3 event (s3 create) to the endpoint | |
const s3Event = JSON.parse(event.body); | |
// pull the first record (there should only be 1) | |
const firstRecord = s3Event.Records.pop(); | |
// return error if the firstRecord is undefined | |
if (!firstRecord) { | |
// TODO What needs to be done here ?? | |
return { | |
statusCode: 400, | |
body: "No records found", | |
}; | |
} | |
// 2. configure parameters for parsing CSV | |
// more info on the API: https://docs.aws.amazon.com/AmazonS3/latest/API/API_SelectObjectContent.html | |
const params = { | |
Bucket: firstRecord.s3.bucket.name, | |
Key: firstRecord.s3.object.key, | |
// 3. use SQL to query the object/file | |
// more info here: https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-glacier-select-sql-reference-select.html | |
// This example SQL pulls 10 records from the file | |
Expression: "select * from S3Object limit 10", // TODO what's the query? | |
ExpressionType: "SQL", | |
InputSerialization: { | |
CSV: { | |
FileHeaderInfo: "USE", // TODO these are optional | |
RecordDelimiter: "\n", | |
FieldDelimiter: ",", | |
QuoteCharacter: '"', | |
QuoteEscapeCharacter: '"', | |
CommentCharacter: "#", | |
RecordEncoding: "UTF-8", | |
FieldEncoding: "UTF-8", | |
}, | |
}, | |
OutputSerialization: { | |
CSV: { | |
QuoteFields: "ALWAYS", | |
QuoteEscapeCharacter: '"', | |
RecordDelimiter: "\n", | |
FieldDelimiter: ",", | |
QuoteCharacter: '"', | |
}, | |
}, | |
}; | |
//4. send api call | |
const result = await s3.send(new s3lib.SelectObjectContentCommand(params)); | |
let data = ""; | |
if (result.Payload) { | |
// 5. pull payload from stream | |
for await (let s of result.Payload) { | |
if (s.Records) { | |
// event.Records.Payload is a buffer containing | |
// a single record, partial records, or multiple records | |
data = new util.TextDecoder().decode(s.Records.Payload); | |
} else if (s.Stats) { | |
// console.log(s.Stats.Details); | |
} else if (s.End) { | |
// console.log('SelectObjectContent completed'); | |
} | |
} | |
} | |
return { | |
statusCode: 200, | |
// 6. return s3 select payload as an api response | |
body: data, | |
}; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment