Last active
January 28, 2019 19:09
-
-
Save dtenenba/9d6e60446dc29c8b8c58e932b67194c4 to your computer and use it in GitHub Desktop.
Permanently delete objects in a versioned S3 bucket
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Permanently delete all "deleted" objects in a versioned S3 bucket. | |
In a versioned bucket, just deleting an object does not actually delete it | |
(though bucket lifecycle policies may eventually do so). Previous versions | |
will still be in the bucket. Deletions are recorded as "delete markers". | |
Use this script if you really really want to permanently delete all | |
versions of "deleted" items in a bucket. | |
NOTE: Once you have permanently deleted an object with this script, there | |
is no getting it back. It's gone! | |
""" | |
import boto3 | |
bucket = "fh-pi-meshinchi-s" # change this to match your bucket | |
args = {} | |
s3 = boto3.client("s3") | |
args = dict(Bucket=bucket) | |
# you could set args['Prefix'] here if you only want to operate on a certain prefix | |
results = {} | |
while True: | |
resp = s3.list_object_versions(**args) | |
if "Versions" in resp: | |
for item in resp["Versions"]: | |
results.setdefault(item["Key"], dict(objects=[], delete_markers=[]))[ | |
"objects" | |
].append(item) | |
if "DeleteMarkers" in resp: | |
for item in resp["DeleteMarkers"]: | |
results.setdefault(item["Key"], dict(objects=[], delete_markers=[]))[ | |
"delete_markers" | |
].append(item) | |
if not resp["IsTruncated"]: | |
break | |
args["KeyMarker"] = resp["NextKeyMarker"] | |
args["VersionIdMarker"] = resp["NextVersionIdMarker"] | |
# filter out all items that don't have delete markers: | |
results = { | |
key: value for key, value in results.items() if len(value["delete_markers"]) > 0 | |
} | |
# determine if the most recent status of the object is deleted. | |
def is_deleted(key): | |
tmp = [x for x in results[key]["delete_markers"] if x["IsLatest"]] | |
return len(tmp) > 0 | |
# now do the actual deletion | |
for key, value in results.items(): | |
if is_deleted(key): | |
print("Deleting {}".format(key)) | |
for item in value["objects"]: | |
s3.delete_object(Bucket=bucket, Key=key, VersionId=item["VersionId"]) | |
for item in value["delete_markers"]: | |
s3.delete_object(Bucket=bucket, Key=key, VersionId=item["VersionId"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment