-
-
Save jeroos/c4336e8e177ab634a91968431d8af4cd to your computer and use it in GitHub Desktop.
import json | |
import os | |
import duckdb | |
import boto3 | |
import datetime | |
from typing import Any, Dict | |
def construct_prepared_sql_and_params(sql_template, bind_params): | |
single_value_params = {k: v for k, v in bind_params.items() if not isinstance(v, list)} | |
list_params = {k: v for k, v in bind_params.items() if isinstance(v, list)} | |
for key in single_value_params.keys(): | |
sql_template = sql_template.replace(f"{{{key}}}", "?") | |
for key, values in list_params.items(): | |
placeholders = ', '.join(['?'] * len(values)) | |
sql_template = sql_template.replace(f"{{{key}}}", placeholders) | |
flat_params = [] | |
for key in bind_params: | |
if key in single_value_params: | |
flat_params.append(single_value_params[key]) | |
elif key in list_params: | |
flat_params.extend(list_params[key]) | |
return sql_template, flat_params | |
def serialize_dates(row): | |
for key, value in row.items(): | |
if isinstance(value, datetime.date): | |
row[key] = value.strftime('%d-%m-%Y') | |
return row | |
def lambda_handler(event: Dict[str, Any], context): | |
print("Duckdb Lambda received event: " + json.dumps(event, indent=4)) | |
file_name = event.get('file_name') | |
sql = event.get('sql') | |
bind_params = event.get('bind_params', {}) | |
return_type = event.get('return_type', 'List') | |
s3 = boto3.client('s3') | |
bucket_name = os.getenv('BUCKET_NAME') | |
local_file_name = f"/tmp/{file_name}" | |
os.makedirs(os.path.dirname(local_file_name), exist_ok=True) | |
s3.download_file(bucket_name, file_name, local_file_name) | |
conn = duckdb.connect(database=':memory:', read_only=False) | |
prepared_sql = sql % local_file_name | |
try: | |
prepared_sql, flat_params = construct_prepared_sql_and_params(prepared_sql, bind_params) | |
print(prepared_sql) | |
print(f"flat_params -> {flat_params}") | |
cursor = conn.execute(prepared_sql, parameters=flat_params) | |
columns = [description[0] for description in cursor.description] | |
fetched_results = [dict(zip(columns, row)) for row in cursor.fetchall()] | |
fetched_results = [serialize_dates(row) for row in fetched_results] | |
if return_type == 'Single': | |
results = fetched_results[0] if fetched_results else {} | |
else: | |
results = fetched_results | |
finally: | |
conn.close() | |
print("Duckdb Lambda processing completed") | |
return results |
Steps:
Create the Layer:
Follow the same steps as above to create a virtual environment and install the duckdb module.
Zip the contents of the virtual environment's site-packages directory.
bash
Copy code
cd myenv/lib/python3.*/site-packages
zip -r ../../../../duckdb_layer.zip .
Create a Lambda Layer in the AWS Console:
Go to the AWS Lambda console and navigate to the Layers section.
Click Create layer, give it a name, and upload the duckdb_layer.zip file.
Attach the Layer to Your Lambda Function:
In your Lambda function’s configuration, go to the Layers section and add the duckdb layer you just created.
@vinayakvanarse did you manage to get the layer for duck db working? I need some tips too, have the same error
Hi, did anyone manage to get this working please as currently I am facing the same issue.
Hi, did anyone manage to get this working please as currently I am facing the same issue.
I made 3.9 python layer x86_64 via Cloudshell in AWS console in the browser, and that works fine.
Hi @alexander-grun, thanks for your reply. Unfortunately I am still not able to make it work. Currently I am following the following steps.
- Go into AWS CloudShell
- Create a directory pythonTest2
mkdir pythonTest2
- Run the following to install the packages
pip3 install duckdb -t pythonTest2/
pip3 install duckdb -t pythonTest2/
- Zip the folder
zip -r9 pythonTest2.zip pythonTest2
- Upload the ZIP file to an S3 bucket
aws s3 cp pythonTest2.zip s3://d-lambdatest
- Create the lambda layer
- Assign the layer to the lambda function
Are these steps correct please or am I missing something?
Thanks!
@julian-calleja try also setting env variable in lambda as HOME:/tmp
The layer I made is here if you want to give it a try: arn:aws:lambda:eu-north-1:399679954749:layer:duck39:1
Hi @alexander-grun!
I just found this page, and I see that you shared your arn location. Can you please share the package itself in the zipped format, which can be uploaded to AWS Layers?
Thank you,
Balazs
Can I also get the zip? [email protected]
Here is a step-by-step guide on how to create your own Lambda Layer via Docker and upload it to AWS
https://alexander-grun.github.io/ag-blog/duck-db-aws-lambda-layer/
Thank you, I got the zip, I created a Layer with it, and attached to my function (it's strange for me, that I can't choose it from Custom Layers, only I can add with the ARN, but at least that works), and I'm still getting the same error:
Function Logs:
[ERROR] Runtime.ImportModuleError: Unable to import module 'lambda_function': No module named 'duckdb.duckdb'
Traceback (most recent call last):
The code:
import json
import duckdb
def lambda_handler(event, context):
# TODO implement
return {
'statusCode': 200,
'body': json.dumps('Hello from Lambda!')
}
If I repack it, without the "python" folder, so the subfolders right directly in the zip, I'm still getting the error
{
"errorMessage": "Unable to import module 'lambda_function': No module named 'duckdb'",
"errorType": "Runtime.ImportModuleError",
"requestId": "",
"stackTrace": []
}
@bbalazs-cubix in the environment variable of Lambda add Home: /tmp as a variable and try again
Thanks for the article, unfortunately that uses Glue, not Lambda, but it was interesting. Tried creating an environment variable called "Home" with the value "/tmp", but I'm getting the same import error. Trying to look for another solution.
Hi @jeroos, I am working on including DuckDB in 3.12 python version of lambda function. I am attaching a layer to the lambda function. I keep getting error in lambda that [ERROR] Runtime.ImportModuleError: Unable to import module 'lambda_function': No module named 'duckdb.duckdb'. Would you please guide how you were able to execute the lambda and if you used the layer then what were your steps. I know this is basic but I have spent a good amount of tries on my mac to get it working.