ntantri · July 3, 2019 17:45
diff --git a/download_s3_files_cross_account_iam.py b/download_s3_files_cross_account_iam.py
 #!/usr/bin/python
 # -*- coding: utf-8 -*-

 import argparse
 import boto3


 def fetch_s3_client_object(credentials):
    """
    Return the s3 object that would be used.
    Parameters
    ----------
        credentials: string
            credentials which have been generated for time being

    Returns
    ----------
        s3: Object
            The object which is s3 client
    """

    aws_access_key_id = credentials['AccessKeyId']
    aws_secret_access_key = credentials['SecretAccessKey']
    aws_session_token = credentials['SessionToken']
    session = \
        boto3.session.Session(aws_access_key_id=aws_access_key_id,
                              aws_secret_access_key=aws_secret_access_key,
                              aws_session_token=aws_session_token)
    s3 = session.client('s3')
    return s3


 def fetch_objects(bucket, prefix, s3, throwerror):
    """
    This solution was available via: https://alexwlchan.net/2018/01/listing-s3-keys-redux/
    This function is the crucial function, which loops through all the available files in a folder and fetches the key of that object.
    Parameters
    ----------
    bucket: string
         takes the bucket name
    prefix: string
         takes the prefix - either till the folder or till the entire file name.
    s3: string
       s3 client object to be used
    throwerror: boolean
             this boolean value depicts whether to throw exception in case there
             is any missing data (useful for validation)
    Returns
    -------
    list
       Generates a list of keys which are found inside the prefix
    """

    print 'Fetching s3 files from bucket ' + str(bucket) \
        + ' and prefix ' + str(prefix)

    kwargs = {'Bucket': bucket}

    # If the prefix is a single string (not a tuple of strings), we can
    # do the filtering directly in the S3 API.

    if isinstance(prefix, str):
        kwargs['Prefix'] = prefix

    while True:
        # The S3 API response is a large blob of metadata.
        # 'Contents' contains information about the listed objects.

        resp = s3.list_objects_v2(**kwargs)
        try:
            contents = resp['Contents']
        except KeyError:
            if throwerror == 'true':
                raise Exception('Could not find data for prefix: '
                                + prefix)
            return

        for obj in contents:
            key = obj['Key']
            if key.startswith(prefix):
                yield key

        # The S3 API is paginated, returning up to 1000 keys at a time.
        # Pass the continuation token into the next response, until we
        # reach the final page (when this field is missing).

        try:
            kwargs['ContinuationToken'] = resp['NextContinuationToken']
        except KeyError:
            break


 def fetch_s3_credentials_assume_role(account_to_assume, name_of_role):

    # create an STS client object that represents a live connection to the
    # STS service

    sts_client = boto3.client('sts')

    # session = boto3.session.Session(session_name="nmc_profile")
    # sts_client = session.client('sts')

    # Call the assume_role method of the STSConnection object and pass the role
    # ARN and a role session name.

    role_arn = 'arn:aws:iam::' + account_to_assume + ':role/' \
        + name_of_role
    print role_arn
    assumed_role_object = sts_client.assume_role(RoleArn=role_arn,
            RoleSessionName='AssumeRoleSessio')

    # From the response that contains the assumed role, get the temporary
    # credentials that can be used to make subsequent API calls

    return assumed_role_object['Credentials']


 def download_s3_files(bucket, prefix, account, role_name, throwerror):
    """
    Downloads the files from a given prefix.
    Parameters
    ----------
    bucket: string
        Bucket information like my-bucket
    prefix: string
        takes the prefix - either till the folder or till the entire file name.
    account: string
        Which IAM account number
    role_name: string
        The role name which is going to be used for connectivity
    throwerror: boolean
        this boolean value depicts whether to throw exception in case there is any missing data (useful for validation)
    """

    credentials = fetch_s3_credentials_assume_role(account, role_name)
    s3_client = fetch_s3_client_object(credentials)
    object_items = fetch_objects(bucket, prefix, s3_client,
                                 throwerror)

    for item in object_items:
        file_name = item.split("/")[-1]
        print "Downloading the file: {}", file_name
        s3_client.download_file(bucket, item, file_name)


 def fetch_args():
    """
    Is an arguments parser which showcases all possible arguments this python function takes in.
    """

    parser = \
        argparse.ArgumentParser(description='''Provide details: example python download_from_s3_via_assume_role.py
          -b "bucket-name" -p "path/to/file/" -a "account-name-or-id" -r "role-name" '''
                                )
    parser.add_argument('-b', metavar='--bucket',
                        help='''Provide the bucket name, for example:  bucket-name '''
                        )
    parser.add_argument('-p', metavar='--prefix',
                        help='''Provide the prefix - till the folder, for example: path/to/file/mysample-file.tsv.gz'''
                        )
    parser.add_argument('-a', metavar='--account',
                        help='''Account name or id''')
    parser.add_argument('-r', metavar='--role',
                        help='''Role which needs to be used for connectivity'''
                        )
    parser.add_argument('-e', metavar='--error',
                        help='''Throw error if there is a problem with file - true/false'''
                        )

    return parser


 if __name__ == '__main__':
    parser = fetch_args()
    args = parser.parse_args()

    bucket = args.b
    prefix = args.p
    account = args.a
    role_name = args.r
    throwerror = args.e

    download_s3_files(bucket, prefix, account, role_name, throwerror)
	#!/usr/bin/python
	# -- coding: utf-8 --

	import argparse
	import boto3


	def fetch_s3_client_object(credentials):
	"""
	Return the s3 object that would be used.
	Parameters
	----------
	credentials: string
	credentials which have been generated for time being

	Returns
	----------
	s3: Object
	The object which is s3 client
	"""

	aws_access_key_id = credentials['AccessKeyId']
	aws_secret_access_key = credentials['SecretAccessKey']
	aws_session_token = credentials['SessionToken']
	session = \
	boto3.session.Session(aws_access_key_id=aws_access_key_id,
	aws_secret_access_key=aws_secret_access_key,
	aws_session_token=aws_session_token)
	s3 = session.client('s3')
	return s3


	def fetch_objects(bucket, prefix, s3, throwerror):
	"""
	This solution was available via: https://alexwlchan.net/2018/01/listing-s3-keys-redux/
	This function is the crucial function, which loops through all the available files in a folder and fetches the key of that object.
	Parameters
	----------
	bucket: string
	takes the bucket name
	prefix: string
	takes the prefix - either till the folder or till the entire file name.
	s3: string
	s3 client object to be used
	throwerror: boolean
	this boolean value depicts whether to throw exception in case there
	is any missing data (useful for validation)
	Returns
	-------
	list
	Generates a list of keys which are found inside the prefix
	"""

	print 'Fetching s3 files from bucket ' + str(bucket) \
	+ ' and prefix ' + str(prefix)

	kwargs = {'Bucket': bucket}

	# If the prefix is a single string (not a tuple of strings), we can
	# do the filtering directly in the S3 API.

	if isinstance(prefix, str):
	kwargs['Prefix'] = prefix

	while True:
	# The S3 API response is a large blob of metadata.
	# 'Contents' contains information about the listed objects.

	resp = s3.list_objects_v2(**kwargs)
	try:
	contents = resp['Contents']
	except KeyError:
	if throwerror == 'true':
	raise Exception('Could not find data for prefix: '
	+ prefix)
	return

	for obj in contents:
	key = obj['Key']
	if key.startswith(prefix):
	yield key

	# The S3 API is paginated, returning up to 1000 keys at a time.
	# Pass the continuation token into the next response, until we
	# reach the final page (when this field is missing).

	try:
	kwargs['ContinuationToken'] = resp['NextContinuationToken']
	except KeyError:
	break


	def fetch_s3_credentials_assume_role(account_to_assume, name_of_role):

	# create an STS client object that represents a live connection to the
	# STS service

	sts_client = boto3.client('sts')

	# session = boto3.session.Session(session_name="nmc_profile")
	# sts_client = session.client('sts')

	# Call the assume_role method of the STSConnection object and pass the role
	# ARN and a role session name.

	role_arn = 'arn:aws:iam::' + account_to_assume + ':role/' \
	+ name_of_role
	print role_arn
	assumed_role_object = sts_client.assume_role(RoleArn=role_arn,
	RoleSessionName='AssumeRoleSessio')

	# From the response that contains the assumed role, get the temporary
	# credentials that can be used to make subsequent API calls

	return assumed_role_object['Credentials']


	def download_s3_files(bucket, prefix, account, role_name, throwerror):
	"""
	Downloads the files from a given prefix.
	Parameters
	----------
	bucket: string
	Bucket information like my-bucket
	prefix: string
	takes the prefix - either till the folder or till the entire file name.
	account: string
	Which IAM account number
	role_name: string
	The role name which is going to be used for connectivity
	throwerror: boolean
	this boolean value depicts whether to throw exception in case there is any missing data (useful for validation)
	"""

	credentials = fetch_s3_credentials_assume_role(account, role_name)
	s3_client = fetch_s3_client_object(credentials)
	object_items = fetch_objects(bucket, prefix, s3_client,
	throwerror)

	for item in object_items:
	file_name = item.split("/")[-1]
	print "Downloading the file: {}", file_name
	s3_client.download_file(bucket, item, file_name)


	def fetch_args():
	"""
	Is an arguments parser which showcases all possible arguments this python function takes in.
	"""

	parser = \
	argparse.ArgumentParser(description='''Provide details: example python download_from_s3_via_assume_role.py
	-b "bucket-name" -p "path/to/file/" -a "account-name-or-id" -r "role-name" '''
	)
	parser.add_argument('-b', metavar='--bucket',
	help='''Provide the bucket name, for example: bucket-name '''
	)
	parser.add_argument('-p', metavar='--prefix',
	help='''Provide the prefix - till the folder, for example: path/to/file/mysample-file.tsv.gz'''
	)
	parser.add_argument('-a', metavar='--account',
	help='''Account name or id''')
	parser.add_argument('-r', metavar='--role',
	help='''Role which needs to be used for connectivity'''
	)
	parser.add_argument('-e', metavar='--error',
	help='''Throw error if there is a problem with file - true/false'''
	)

	return parser


	if __name__ == '__main__':
	parser = fetch_args()
	args = parser.parse_args()

	bucket = args.b
	prefix = args.p
	account = args.a
	role_name = args.r
	throwerror = args.e

	download_s3_files(bucket, prefix, account, role_name, throwerror)