Last active
November 21, 2024 22:52
-
-
Save Hipnosis183/a1932b56c63246a45a614e873f1a1465 to your computer and use it in GitHub Desktop.
Amazon S3 Bucket - Files Downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Requires Boto3 package. | |
# pip install boto3 | |
import boto3 | |
import botocore | |
import os | |
import sys | |
# Initialize S3 client without credentials. | |
s3 = boto3.client('s3', config=boto3.session.Config(signature_version=botocore.UNSIGNED)) | |
# Download all files in a given bucket. | |
def files_download(bucket): | |
try: | |
files_path = f'{os.getcwd()}/{bucket}' | |
# Bucket cursor pagination loop. | |
token = None | |
while True: | |
# Get list of objects in the bucket. | |
params = {'Bucket': bucket} | |
if token: | |
params['ContinuationToken'] = token | |
response = s3.list_objects_v2(**params) | |
# Check if the bucket is empty. | |
if 'Contents' not in response: | |
print(f'Bucket {bucket} has no files.') | |
return | |
# Download all files in the current bucket list. | |
for object in response['Contents']: | |
file_key = object['Key'] | |
file_path = os.path.join(files_path, file_key) | |
print(f'- {file_key}') | |
# Ensure the file path exists. | |
if not os.path.exists(os.path.dirname(file_path)): | |
os.makedirs(os.path.dirname(file_path)) | |
# Download the file. | |
print(f'Downloading {file_key} to {file_path}...') | |
s3.download_file(bucket, file_key, file_path) | |
print(f'Downloaded {file_key} successfully.') | |
# Check if there are more files. | |
if response.get('IsTruncated'): | |
token = response.get('NextContinuationToken') | |
else: | |
break | |
except Exception as e: | |
print(f'An error occurred: {e}') | |
def main(): | |
# Define bucket name. | |
bucket = sys.argv[1] if len(sys.argv) > 1 else 'mbrown-dq-mdl' | |
# Start bucket files download. | |
print(f'Downloading files from bucket \'{bucket}\'...') | |
files_download(bucket) | |
# Run script. | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Requires Boto3 package. | |
# pip install boto3 | |
import boto3 | |
import botocore | |
import sys | |
# Convert size in bytes to a human-readable format. | |
def size_format(bytes): | |
if bytes >= 1024 ** 3: | |
return f'{bytes / (1024 ** 3):.2f} GB' | |
elif bytes >= 1024**2: | |
return f'{bytes / (1024 ** 2):.2f} MB' | |
else: | |
return f'{bytes / 1024:.2f} KB' | |
# Initialize S3 client without credentials. | |
s3 = boto3.client('s3', config=boto3.session.Config(signature_version=botocore.UNSIGNED)) | |
# Get list of files for a given bucket. | |
def files_list(bucket): | |
files = [] | |
size = 0 | |
# Setup pagination to overcome the 1000 files limit. | |
paginator = s3.get_paginator('list_objects_v2') | |
pages = paginator.paginate(Bucket=bucket) | |
# Iterate through the pages and get file keys and sizes. | |
for page in pages: | |
if 'Contents' in page: | |
for item in page['Contents']: | |
size += item['Size'] | |
files.append({ | |
'file_key': item['Key'], | |
'size': size_format(item['Size']) | |
}) | |
return files, size | |
# Create index page from the files list. | |
def html_create(bucket, files, size): | |
files_total = len(files) | |
size_total = size_format(size) | |
html = f''' | |
<html> | |
<head> | |
<title>Files in Bucket: {bucket}</title> | |
<style> | |
body {{ | |
font-family: Arial, sans-serif; | |
}} | |
table {{ | |
width: 100%; | |
border-collapse: collapse; | |
}} | |
th, td {{ | |
border: 1px solid #ddd; | |
padding: 8px; | |
text-align: left; | |
}} | |
th {{ | |
background-color: #f2f2f2; | |
}} | |
a {{ | |
color: #2a65b1; | |
}} | |
</style> | |
</head> | |
<body> | |
<h1>Files in Bucket: {bucket}</h1> | |
<p><strong>Total Files:</strong> {files_total} | <strong>Total Size:</strong> {size_total}</p> | |
<table> | |
<thead> | |
<tr> | |
<th>Name</th> | |
<th>Size</th> | |
<th>Link</th> | |
</tr> | |
</thead> | |
<tbody> | |
''' | |
for file in files: | |
html += f''' | |
<tr> | |
<td>{file['file_key']}</td> | |
<td>{file['size']}</td> | |
<td><a href="https://{bucket}.s3.amazonaws.com/{file['file_key']}" target="_blank">Download</a></td> | |
</tr> | |
''' | |
html += ''' | |
</tbody> | |
</table> | |
</body> | |
</html> | |
''' | |
return html | |
# Save index page to a file. | |
def html_save(html, filename): | |
with open(filename, 'w') as file: | |
file.write(html) | |
def main(): | |
# Define bucket name. | |
bucket = sys.argv[1] if len(sys.argv) > 1 else 'mbrown-dq-mdl' | |
# List all files in the bucket. | |
print(f'Fetching files from bucket \'{bucket}\'...') | |
files, size = files_list(bucket) | |
# Generate the HTML content. | |
print('Generating HTML file...') | |
html = html_create(bucket, files, size) | |
# Save the HTML content to a file. | |
html_save(html, f'{bucket}.html') | |
print(f'Saved HTML file as \'{bucket}.html\'.') | |
# Run script. | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment