Skip to content

Instantly share code, notes, and snippets.

@mikeapted
Last active November 4, 2019 12:53
Show Gist options
  • Save mikeapted/1b32912d8becbfcf9fc66467ff5285e0 to your computer and use it in GitHub Desktop.
Save mikeapted/1b32912d8becbfcf9fc66467ff5285e0 to your computer and use it in GitHub Desktop.
Custom R runtime for AWS Lambda with support for MSSQL database connections

Download repo from Appsilon

git clone https://github.com/Appsilon/r-lambda-workflow
cd r-lambda-workflow

Setup a python virtual env (may not be necessary)

python3 -m venv r-lambda-env
source r-lambda-env/bin/activate

Install paramiko library

pip install paramiko

Validate you are signed into AWS on CLI and set your region

aws configure set default.region ca-central-1

Create an EC2 key pair and protect it

aws ec2 create-key-pair --key-name r-lambda --query 'KeyMaterial' --output text >> r-lambda.pem
chmod 600 r-lambda.pem

Validate SSH access from your IP in default security group

aws ec2 authorize-security-group-ingress \
    --group-name default \
    --protocol tcp \
    --port 22 \
    --cidr $(curl -s http://checkip.amazonaws.com/)/32

Replace the build_r.sh script with the one in this Gist

Create an R runtime base layer (this will take ~5 mins)

./setup_r_instance.py -k r-lambda.pem -i t2.large
./build_runtime.sh

Publish the base layer to your AWS account

aws lambda publish-layer-version --layer-name r-lambda-base-layer --zip-file fileb://runtime.zip

Create an AMI for use with package layer creation

./setup_r_instance.py -k r-lambda.pem -i t2.large -a create_ami -n r-lambda-ami

Create layer for R packages

./r_package_layer.py -k r-lambda.pem -i t2.large -m <ami-...> -p "lubridate, RODBC, plyr, jsonlite"

Publish the base layer to your AWS account

aws lambda publish-layer-version --layer-name r-lambda-packages-layer --zip-file fileb://packages.zip
#!/bin/bash
# modified from https://github.com/bakdata/aws-lambda-r-runtime
set -euo pipefail
VERSION=$1
if [ -z "$VERSION" ];
then
echo 'version number required'
exit 1
fi
wget https://cran.uni-muenster.de/src/base/R-3/R-$VERSION.tar.gz
sudo mkdir /opt/R/
sudo chown $(whoami) /opt/R/
tar -xf R-$VERSION.tar.gz
mv R-$VERSION/* /opt/R/
sudo yum install -y readline-devel \
xorg-x11-server-devel libX11-devel libXt-devel \
curl-devel \
gcc-c++ gcc-gfortran \
zlib-devel bzip2 bzip2-libs
# workaround for making R build work
# issue seems similar to https://stackoverflow.com/questions/40639138/configure-error-installing-r-3-3-2-on-ubuntu-checking-whether-bzip2-support-suf
sudo yum install -y R
cd /opt/R/
./configure --prefix=/opt/R/ --exec-prefix=/opt/R/ --with-libpth-prefix=/opt/ --without-recommended-packages
make
cp /usr/lib64/libgfortran.so.3 lib/
cp /usr/lib64/libgomp.so.1 lib/
cp /usr/lib64/libquadmath.so.0 lib/
cp /usr/lib64/libstdc++.so.6 lib/
# MSSQL
sudo bash -c "curl https://packages.microsoft.com/config/rhel/6/prod.repo > /etc/yum.repos.d/mssql-release.repo"
sudo yum install -y freetds
sudo yum install -y unixODBC unixODBC-devel --disablerepo=amzn*
cp /usr/lib64/libtdsodbc.so.0 lib/
cp /usr/lib64/libodb* lib/
sudo yum install -y openssl-devel libxml2-devel
./bin/Rscript -e 'chooseCRANmirror(graphics=FALSE, ind=34); install.packages("jsonlite")'
zip -r -q R.zip bin/ lib/ lib64/ etc/ library/ doc/ modules/ share/
#set($map = {})
#foreach( $token in $input.path('$').split('&') )
#set( $keyVal = $token.split('=') )
#set( $keyValSize = $keyVal.size() )
#if( $keyValSize >= 1 )
#set( $key = $util.urlDecode($keyVal[0]) )
#foreach ( $item in ["foo", "bar", "baz", "bat"] )
#if( $key == $item)
#if( $keyValSize >= 2 )
$!map.put($item, $util.urlDecode($keyVal[1]))
#else
$!map.put($item, '')
#end
#end
#end
#end
#end{"foo":"$map.get('foo')","bar":"$map.get('bar')","baz":$map.get('baz'),"bat":$map.get('bat')}
[MSSQLServer]
Driver = FreeTDS
Description = FreeTDS
Trace = No
Server = <database endpoint>
Port = 1433
TDS version = 0.95
[FreeTDS]
Description = Freetds v 0.95
Driver = /opt/R/lib/libtdsodbc.so.0
#foreach( $token in $input.path('$').split('&') )
#set( $keyVal = $token.split('=') )
#set( $keyValSize = $keyVal.size() )
#if( $keyValSize >= 1 )
#set( $key = $util.urlDecode($keyVal[0]) )
#if( $key == "x")
#if( $keyValSize >= 2 )
#set( $x = $util.urlDecode($keyVal[1]) )
#else
#set( $x = '' )
#end
#end
#end
#end{"x":$x}
library(jsonlite)
library(RODBC)
DB_USER <- Sys.getenv('DB_USER')
DB_PASS <- Sys.getenv('DB_PASS')
hello <- function(...) {
ch <- odbcDriverConnect(paste("DSN={MSSQLServer};UID=", DB_USER, ";Pwd=", DB_PASS, sep=""))
resultset <- sqlQuery(ch, "select @@VERSION")
close(ch)
return(toJSON(resultset))
}
plusone <- function(x = 1) {
return(as.numeric(x) + 1)
}
concat <- function(foo = "", bar = "", baz = 1, bat = 1, ...) {
return(result <- paste(foo, bar, as.character(baz), as.character(bat), sep=" ", collapse=""))
}
dataframe <- function() {
n = c(2, 3, 5)
s = c("aa", "bb", "cc")
b = c(TRUE, FALSE, TRUE)
df = data.frame(n, s, b)
return(toJSON(df))
}
# Welcome to Serverless!
service: sls-lambda-r
custom:
region: <region>
accountID: <account ID>
runtimeLayer: <runtime layer>
runtimeVersion: <runtime version>
packagesLayer: <package layer>
packagesVersion: <package layer version>
securityGroupId: sg-xxxxxxxx
subnet1: subnet-xxxxxxxx
subnet2: subnet-xxxxxxxx
db_user: <username>
db_pass: <password>
provider:
name: aws
role: arn:aws:iam::${self:custom.accountID}:role/lambda_basic_execution
runtime: provided
memorySize: 1024
timeout: 30
region: ${self:custom.region}
vpc:
securityGroupIds:
- ${self:custom.securityGroupId}
subnetIds:
- ${self:custom.subnet1}
- ${self:custom.subnet1}
environment:
ODBCSYSINI: /var/task/etc/
ODBCINI: /var/task/etc/
DB_USER: ${self:custom.db_user}
DB_PASS: ${self:custom.db_pass}
apiKeys:
- myApiKey
functions:
hello:
private: true
handler: script.hello
layers:
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.runtimeLayer}:${self:custom.runtimeVersion}
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.packagesLayer}:${self:custom.packagesVersion}
events:
- http:
path: hello
method: get
private: true
integration: lambda
request:
template:
application/json: '{}'
plusone:
handler: script.plusone
layers:
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.runtimeLayer}:${self:custom.runtimeVersion}
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.packagesLayer}:${self:custom.packagesVersion}
events:
- http:
path: plusone
method: get
private: true
integration: lambda
request:
template:
application/json: '{"x":$input.params().querystring.x}'
plusonepost:
handler: script.plusone
layers:
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.runtimeLayer}:${self:custom.runtimeVersion}
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.packagesLayer}:${self:custom.packagesVersion}
events:
- http:
path: plusone
method: post
private: true
integration: lambda
request:
template:
application/json: '{"x":$input.path(''$.x'')}'
application/x-www-form-urlencoded: ${file(plusone-post.vtl)}
concat:
handler: script.concat
layers:
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.runtimeLayer}:${self:custom.runtimeVersion}
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.packagesLayer}:${self:custom.packagesVersion}
events:
- http:
path: concat
method: get
private: true
integration: lambda
request:
template:
application/json: '{"foo":"$input.params().querystring.foo","bar":"$input.params().querystring.bar","baz":$input.params().querystring.baz,"bat":$input.params().querystring.bat}'
concatpost:
handler: script.concat
layers:
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.runtimeLayer}:${self:custom.runtimeVersion}
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.packagesLayer}:${self:custom.packagesVersion}
events:
- http:
path: concat
method: post
private: true
integration: lambda
request:
template:
application/json: '{"foo":"$input.path(''$.foo'')","bar":"$input.path(''$.bar'')","baz":$input.path(''$.baz''),"bat":$input.path(''$.bat'')}'
application/x-www-form-urlencoded: ${file(concat-post.vtl)}
dataframe:
handler: script.dataframe
layers:
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.runtimeLayer}:${self:custom.runtimeVersion}
- arn:aws:lambda:${self:custom.region}:${self:custom.accountID}:layer:${self:custom.packagesLayer}:${self:custom.packagesVersion}
events:
- http:
path: dataframe
method: get
private: true
integration: lambda
request:
template:
application/json: '{}'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment