Skip to content

Instantly share code, notes, and snippets.

@ntantri
Last active July 3, 2019 17:45
Show Gist options
  • Save ntantri/f7236aec98e8df72446e9bf49b54a4db to your computer and use it in GitHub Desktop.
Save ntantri/f7236aec98e8df72446e9bf49b54a4db to your computer and use it in GitHub Desktop.
This file will help download files from S3 which uses IAM assume role for cross account collaboration
#!/usr/bin/python
# -*- coding: utf-8 -*-
import argparse
import boto3
def fetch_s3_client_object(credentials):
"""
Return the s3 object that would be used.
Parameters
----------
credentials: string
credentials which have been generated for time being
Returns
----------
s3: Object
The object which is s3 client
"""
aws_access_key_id = credentials['AccessKeyId']
aws_secret_access_key = credentials['SecretAccessKey']
aws_session_token = credentials['SessionToken']
session = \
boto3.session.Session(aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_session_token=aws_session_token)
s3 = session.client('s3')
return s3
def fetch_objects(bucket, prefix, s3, throwerror):
"""
This solution was available via: https://alexwlchan.net/2018/01/listing-s3-keys-redux/
This function is the crucial function, which loops through all the available files in a folder and fetches the key of that object.
Parameters
----------
bucket: string
takes the bucket name
prefix: string
takes the prefix - either till the folder or till the entire file name.
s3: string
s3 client object to be used
throwerror: boolean
this boolean value depicts whether to throw exception in case there
is any missing data (useful for validation)
Returns
-------
list
Generates a list of keys which are found inside the prefix
"""
print 'Fetching s3 files from bucket ' + str(bucket) \
+ ' and prefix ' + str(prefix)
kwargs = {'Bucket': bucket}
# If the prefix is a single string (not a tuple of strings), we can
# do the filtering directly in the S3 API.
if isinstance(prefix, str):
kwargs['Prefix'] = prefix
while True:
# The S3 API response is a large blob of metadata.
# 'Contents' contains information about the listed objects.
resp = s3.list_objects_v2(**kwargs)
try:
contents = resp['Contents']
except KeyError:
if throwerror == 'true':
raise Exception('Could not find data for prefix: '
+ prefix)
return
for obj in contents:
key = obj['Key']
if key.startswith(prefix):
yield key
# The S3 API is paginated, returning up to 1000 keys at a time.
# Pass the continuation token into the next response, until we
# reach the final page (when this field is missing).
try:
kwargs['ContinuationToken'] = resp['NextContinuationToken']
except KeyError:
break
def fetch_s3_credentials_assume_role(account_to_assume, name_of_role):
# create an STS client object that represents a live connection to the
# STS service
sts_client = boto3.client('sts')
# session = boto3.session.Session(session_name="nmc_profile")
# sts_client = session.client('sts')
# Call the assume_role method of the STSConnection object and pass the role
# ARN and a role session name.
role_arn = 'arn:aws:iam::' + account_to_assume + ':role/' \
+ name_of_role
print role_arn
assumed_role_object = sts_client.assume_role(RoleArn=role_arn,
RoleSessionName='AssumeRoleSessio')
# From the response that contains the assumed role, get the temporary
# credentials that can be used to make subsequent API calls
return assumed_role_object['Credentials']
def download_s3_files(bucket, prefix, account, role_name, throwerror):
"""
Downloads the files from a given prefix.
Parameters
----------
bucket: string
Bucket information like my-bucket
prefix: string
takes the prefix - either till the folder or till the entire file name.
account: string
Which IAM account number
role_name: string
The role name which is going to be used for connectivity
throwerror: boolean
this boolean value depicts whether to throw exception in case there is any missing data (useful for validation)
"""
credentials = fetch_s3_credentials_assume_role(account, role_name)
s3_client = fetch_s3_client_object(credentials)
object_items = fetch_objects(bucket, prefix, s3_client,
throwerror)
for item in object_items:
file_name = item.split("/")[-1]
print "Downloading the file: {}", file_name
s3_client.download_file(bucket, item, file_name)
def fetch_args():
"""
Is an arguments parser which showcases all possible arguments this python function takes in.
"""
parser = \
argparse.ArgumentParser(description='''Provide details: example python download_from_s3_via_assume_role.py
-b "bucket-name" -p "path/to/file/" -a "account-name-or-id" -r "role-name" '''
)
parser.add_argument('-b', metavar='--bucket',
help='''Provide the bucket name, for example: bucket-name '''
)
parser.add_argument('-p', metavar='--prefix',
help='''Provide the prefix - till the folder, for example: path/to/file/mysample-file.tsv.gz'''
)
parser.add_argument('-a', metavar='--account',
help='''Account name or id''')
parser.add_argument('-r', metavar='--role',
help='''Role which needs to be used for connectivity'''
)
parser.add_argument('-e', metavar='--error',
help='''Throw error if there is a problem with file - true/false'''
)
return parser
if __name__ == '__main__':
parser = fetch_args()
args = parser.parse_args()
bucket = args.b
prefix = args.p
account = args.a
role_name = args.r
throwerror = args.e
download_s3_files(bucket, prefix, account, role_name, throwerror)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment