Created
January 25, 2018 18:31
-
-
Save sigma23/1aa2d280a2b579f64820d80be65752c6 to your computer and use it in GitHub Desktop.
Rename a bulk amount of s3 files using boto3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# matching functions from https://alexwlchan.net/2017/07/listing-s3-keys/ | |
# https://alexwlchan.net/2018/01/listing-s3-keys-redux/ | |
import boto3 | |
import datetime | |
import os | |
def get_matching_s3_objects(bucket, prefix='', suffix=''): | |
""" | |
Generate objects in an S3 bucket. | |
:param bucket: Name of the S3 bucket. | |
:param prefix: Only fetch objects whose key starts with | |
this prefix (optional). | |
:param suffix: Only fetch objects whose keys end with | |
this suffix (optional). | |
""" | |
s3 = boto3.client('s3') | |
kwargs = {'Bucket': bucket} | |
# If the prefix is a single string (not a tuple of strings), we can | |
# do the filtering directly in the S3 API. | |
if isinstance(prefix, str): | |
kwargs['Prefix'] = prefix | |
while True: | |
# The S3 API response is a large blob of metadata. | |
# 'Contents' contains information about the listed objects. | |
resp = s3.list_objects_v2(**kwargs) | |
try: | |
contents = resp['Contents'] | |
except KeyError: | |
return | |
for obj in contents: | |
key = obj['Key'] | |
if key.startswith(prefix) and key.endswith(suffix): | |
yield obj | |
# The S3 API is paginated, returning up to 1000 keys at a time. | |
# Pass the continuation token into the next response, until we | |
# reach the final page (when this field is missing). | |
try: | |
kwargs['ContinuationToken'] = resp['NextContinuationToken'] | |
except KeyError: | |
break | |
def get_matching_s3_keys(bucket, prefix='', suffix=''): | |
""" | |
Generate the keys in an S3 bucket. | |
:param bucket: Name of the S3 bucket. | |
:param prefix: Only fetch keys that start with this prefix (optional). | |
:param suffix: Only fetch keys that end with this suffix (optional). | |
""" | |
for obj in get_matching_s3_objects(bucket, prefix, suffix): | |
yield obj['Key'] | |
bucket_name = 'my_s3_bucket' | |
key_prefix = 'blahdir/other_dir/sometext' | |
files = get_matching_s3_keys(bucket = bucket_name, | |
prefix=key_prefix, | |
suffix='.txt') # only get the files s3://my_s3_bucket/blahdir/other_dir/sometext*.txt | |
bucket = 'my_s3_bucket' | |
new_key = 'blahdir/other_dir/sometext' | |
for each in files: | |
new_file = new_key + each.split("/")[2].split(".txt")[0] + '.json' #copy .txt files to .json for example | |
print("Copying: ", new_file) | |
copy_source = {'Bucket': bucket, 'Key': each} | |
dev_client.copy_object(CopySource = copy_source, Bucket = bucket, Key = new_file) | |
#dev_client.delete_object(Bucket = bucket, Key = 'my_folder/my_file') # uncomment this if want to rename only and not copy |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
`import boto3
import os
from datetime import datetime
prefix = 'abc_'
new_prefix = f"{prefix}{datetime.today().strftime('%Y-%m-%d')}"
suffix = 'csv.gz'
bucket_name = 'mybucketname'
def lambda_handler(event, context):
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
for obj in bucket.objects.all():
key = obj.key
path_part = os.path.dirname(key)
filename = os.path.basename(key)
copy_source = {
'Bucket': bucket_name,
'Key': key
}
if filename.startswith(prefix) and filename.endswith(suffix):
new_key = f"{new_prefix}{key.split('')[1]}"
full_key_with_path = os.path.join(path_part, new_key)
destination_bucket = s3.Bucket(bucket_name)
print(f'copying the object with new key : {full_key_with_path}')
destination_bucket.copy(copy_source, full_key_with_path)
print(f'deleting old key : {key}')
s3.Object(bucket_name, key).delete()`