Skip to content

Instantly share code, notes, and snippets.

@Hipnosis183
Last active November 21, 2024 22:52
Show Gist options
  • Save Hipnosis183/a1932b56c63246a45a614e873f1a1465 to your computer and use it in GitHub Desktop.
Save Hipnosis183/a1932b56c63246a45a614e873f1a1465 to your computer and use it in GitHub Desktop.
Amazon S3 Bucket - Files Downloader
# Requires Boto3 package.
# pip install boto3
import boto3
import botocore
import os
import sys
# Initialize S3 client without credentials.
s3 = boto3.client('s3', config=boto3.session.Config(signature_version=botocore.UNSIGNED))
# Download all files in a given bucket.
def files_download(bucket):
try:
files_path = f'{os.getcwd()}/{bucket}'
# Bucket cursor pagination loop.
token = None
while True:
# Get list of objects in the bucket.
params = {'Bucket': bucket}
if token:
params['ContinuationToken'] = token
response = s3.list_objects_v2(**params)
# Check if the bucket is empty.
if 'Contents' not in response:
print(f'Bucket {bucket} has no files.')
return
# Download all files in the current bucket list.
for object in response['Contents']:
file_key = object['Key']
file_path = os.path.join(files_path, file_key)
print(f'- {file_key}')
# Ensure the file path exists.
if not os.path.exists(os.path.dirname(file_path)):
os.makedirs(os.path.dirname(file_path))
# Download the file.
print(f'Downloading {file_key} to {file_path}...')
s3.download_file(bucket, file_key, file_path)
print(f'Downloaded {file_key} successfully.')
# Check if there are more files.
if response.get('IsTruncated'):
token = response.get('NextContinuationToken')
else:
break
except Exception as e:
print(f'An error occurred: {e}')
def main():
# Define bucket name.
bucket = sys.argv[1] if len(sys.argv) > 1 else 'mbrown-dq-mdl'
# Start bucket files download.
print(f'Downloading files from bucket \'{bucket}\'...')
files_download(bucket)
# Run script.
main()
# Requires Boto3 package.
# pip install boto3
import boto3
import botocore
import sys
# Convert size in bytes to a human-readable format.
def size_format(bytes):
if bytes >= 1024 ** 3:
return f'{bytes / (1024 ** 3):.2f} GB'
elif bytes >= 1024**2:
return f'{bytes / (1024 ** 2):.2f} MB'
else:
return f'{bytes / 1024:.2f} KB'
# Initialize S3 client without credentials.
s3 = boto3.client('s3', config=boto3.session.Config(signature_version=botocore.UNSIGNED))
# Get list of files for a given bucket.
def files_list(bucket):
files = []
size = 0
# Setup pagination to overcome the 1000 files limit.
paginator = s3.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=bucket)
# Iterate through the pages and get file keys and sizes.
for page in pages:
if 'Contents' in page:
for item in page['Contents']:
size += item['Size']
files.append({
'file_key': item['Key'],
'size': size_format(item['Size'])
})
return files, size
# Create index page from the files list.
def html_create(bucket, files, size):
files_total = len(files)
size_total = size_format(size)
html = f'''
<html>
<head>
<title>Files in Bucket: {bucket}</title>
<style>
body {{
font-family: Arial, sans-serif;
}}
table {{
width: 100%;
border-collapse: collapse;
}}
th, td {{
border: 1px solid #ddd;
padding: 8px;
text-align: left;
}}
th {{
background-color: #f2f2f2;
}}
a {{
color: #2a65b1;
}}
</style>
</head>
<body>
<h1>Files in Bucket: {bucket}</h1>
<p><strong>Total Files:</strong> {files_total} | <strong>Total Size:</strong> {size_total}</p>
<table>
<thead>
<tr>
<th>Name</th>
<th>Size</th>
<th>Link</th>
</tr>
</thead>
<tbody>
'''
for file in files:
html += f'''
<tr>
<td>{file['file_key']}</td>
<td>{file['size']}</td>
<td><a href="https://{bucket}.s3.amazonaws.com/{file['file_key']}" target="_blank">Download</a></td>
</tr>
'''
html += '''
</tbody>
</table>
</body>
</html>
'''
return html
# Save index page to a file.
def html_save(html, filename):
with open(filename, 'w') as file:
file.write(html)
def main():
# Define bucket name.
bucket = sys.argv[1] if len(sys.argv) > 1 else 'mbrown-dq-mdl'
# List all files in the bucket.
print(f'Fetching files from bucket \'{bucket}\'...')
files, size = files_list(bucket)
# Generate the HTML content.
print('Generating HTML file...')
html = html_create(bucket, files, size)
# Save the HTML content to a file.
html_save(html, f'{bucket}.html')
print(f'Saved HTML file as \'{bucket}.html\'.')
# Run script.
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment