Last active
July 3, 2019 17:45
-
-
Save ntantri/f7236aec98e8df72446e9bf49b54a4db to your computer and use it in GitHub Desktop.
This file will help download files from S3 which uses IAM assume role for cross account collaboration
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import argparse | |
import boto3 | |
def fetch_s3_client_object(credentials): | |
""" | |
Return the s3 object that would be used. | |
Parameters | |
---------- | |
credentials: string | |
credentials which have been generated for time being | |
Returns | |
---------- | |
s3: Object | |
The object which is s3 client | |
""" | |
aws_access_key_id = credentials['AccessKeyId'] | |
aws_secret_access_key = credentials['SecretAccessKey'] | |
aws_session_token = credentials['SessionToken'] | |
session = \ | |
boto3.session.Session(aws_access_key_id=aws_access_key_id, | |
aws_secret_access_key=aws_secret_access_key, | |
aws_session_token=aws_session_token) | |
s3 = session.client('s3') | |
return s3 | |
def fetch_objects(bucket, prefix, s3, throwerror): | |
""" | |
This solution was available via: https://alexwlchan.net/2018/01/listing-s3-keys-redux/ | |
This function is the crucial function, which loops through all the available files in a folder and fetches the key of that object. | |
Parameters | |
---------- | |
bucket: string | |
takes the bucket name | |
prefix: string | |
takes the prefix - either till the folder or till the entire file name. | |
s3: string | |
s3 client object to be used | |
throwerror: boolean | |
this boolean value depicts whether to throw exception in case there | |
is any missing data (useful for validation) | |
Returns | |
------- | |
list | |
Generates a list of keys which are found inside the prefix | |
""" | |
print 'Fetching s3 files from bucket ' + str(bucket) \ | |
+ ' and prefix ' + str(prefix) | |
kwargs = {'Bucket': bucket} | |
# If the prefix is a single string (not a tuple of strings), we can | |
# do the filtering directly in the S3 API. | |
if isinstance(prefix, str): | |
kwargs['Prefix'] = prefix | |
while True: | |
# The S3 API response is a large blob of metadata. | |
# 'Contents' contains information about the listed objects. | |
resp = s3.list_objects_v2(**kwargs) | |
try: | |
contents = resp['Contents'] | |
except KeyError: | |
if throwerror == 'true': | |
raise Exception('Could not find data for prefix: ' | |
+ prefix) | |
return | |
for obj in contents: | |
key = obj['Key'] | |
if key.startswith(prefix): | |
yield key | |
# The S3 API is paginated, returning up to 1000 keys at a time. | |
# Pass the continuation token into the next response, until we | |
# reach the final page (when this field is missing). | |
try: | |
kwargs['ContinuationToken'] = resp['NextContinuationToken'] | |
except KeyError: | |
break | |
def fetch_s3_credentials_assume_role(account_to_assume, name_of_role): | |
# create an STS client object that represents a live connection to the | |
# STS service | |
sts_client = boto3.client('sts') | |
# session = boto3.session.Session(session_name="nmc_profile") | |
# sts_client = session.client('sts') | |
# Call the assume_role method of the STSConnection object and pass the role | |
# ARN and a role session name. | |
role_arn = 'arn:aws:iam::' + account_to_assume + ':role/' \ | |
+ name_of_role | |
print role_arn | |
assumed_role_object = sts_client.assume_role(RoleArn=role_arn, | |
RoleSessionName='AssumeRoleSessio') | |
# From the response that contains the assumed role, get the temporary | |
# credentials that can be used to make subsequent API calls | |
return assumed_role_object['Credentials'] | |
def download_s3_files(bucket, prefix, account, role_name, throwerror): | |
""" | |
Downloads the files from a given prefix. | |
Parameters | |
---------- | |
bucket: string | |
Bucket information like my-bucket | |
prefix: string | |
takes the prefix - either till the folder or till the entire file name. | |
account: string | |
Which IAM account number | |
role_name: string | |
The role name which is going to be used for connectivity | |
throwerror: boolean | |
this boolean value depicts whether to throw exception in case there is any missing data (useful for validation) | |
""" | |
credentials = fetch_s3_credentials_assume_role(account, role_name) | |
s3_client = fetch_s3_client_object(credentials) | |
object_items = fetch_objects(bucket, prefix, s3_client, | |
throwerror) | |
for item in object_items: | |
file_name = item.split("/")[-1] | |
print "Downloading the file: {}", file_name | |
s3_client.download_file(bucket, item, file_name) | |
def fetch_args(): | |
""" | |
Is an arguments parser which showcases all possible arguments this python function takes in. | |
""" | |
parser = \ | |
argparse.ArgumentParser(description='''Provide details: example python download_from_s3_via_assume_role.py | |
-b "bucket-name" -p "path/to/file/" -a "account-name-or-id" -r "role-name" ''' | |
) | |
parser.add_argument('-b', metavar='--bucket', | |
help='''Provide the bucket name, for example: bucket-name ''' | |
) | |
parser.add_argument('-p', metavar='--prefix', | |
help='''Provide the prefix - till the folder, for example: path/to/file/mysample-file.tsv.gz''' | |
) | |
parser.add_argument('-a', metavar='--account', | |
help='''Account name or id''') | |
parser.add_argument('-r', metavar='--role', | |
help='''Role which needs to be used for connectivity''' | |
) | |
parser.add_argument('-e', metavar='--error', | |
help='''Throw error if there is a problem with file - true/false''' | |
) | |
return parser | |
if __name__ == '__main__': | |
parser = fetch_args() | |
args = parser.parse_args() | |
bucket = args.b | |
prefix = args.p | |
account = args.a | |
role_name = args.r | |
throwerror = args.e | |
download_s3_files(bucket, prefix, account, role_name, throwerror) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment