Skip to content

Instantly share code, notes, and snippets.

@houey
Created April 4, 2026 04:15
Show Gist options
  • Select an option

  • Save houey/1c8d7a0b099a84e9269b9575797e1d25 to your computer and use it in GitHub Desktop.

Select an option

Save houey/1c8d7a0b099a84e9269b9575797e1d25 to your computer and use it in GitHub Desktop.
add delete incomplete multipart uploads and delete expired object deletion markers
#!/usr/bin/env bash
# s3_lifecycle_manager.sh
#
# @decision DEC-LIFECYCLE-001
# @title S3 Lifecycle Cost-Control Rule Manager
# @status accepted
# @rationale Single-script approach using aws CLI + jq avoids Python/boto3 dependency.
# Reads existing lifecycle config before writing to preserve all user rules.
# Uses jq for all JSON construction to prevent shell injection and malformed JSON.
# Error handling is per-bucket (log + continue) so one AccessDenied doesn't abort
# processing for all other accessible buckets.
#
# PURPOSE: Ensures every S3 bucket has a "cost-control" lifecycle rule covering:
# 1. AbortIncompleteMultipartUpload (7 days) — prevents accumulating orphaned parts
# 2. Expiration.ExpiredObjectDeleteMarker: true — removes stale delete markers
#
# USAGE:
# s3_lifecycle_manager.sh --mode MODE [--help|-h]
# Modes: 1=dry-run, 2=apply both rules, 3=apply only incomplete multipart uploads
#
# DEPENDENCIES: aws CLI (configured via env vars), jq
# EXIT CODES: 0 = all buckets succeeded, 1 = one or more errors occurred
set -euo pipefail
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
RULE_ID="cost-control"
ABORT_DAYS=7
# ---------------------------------------------------------------------------
# Usage
# ---------------------------------------------------------------------------
usage() {
cat <<EOF
Usage: $(basename "$0") --mode MODE [OPTIONS]
Manages the "${RULE_ID}" lifecycle rule across every S3 bucket in the account.
OPTIONS:
-m, --mode MODE Required. Select operating mode:
1 Dry run — preview changes without applying
2 Add both: incomplete multipart uploads (${ABORT_DAYS}d) + expired delete markers
3 Add only: incomplete multipart uploads (${ABORT_DAYS}d)
-h, --help Show this help message
CREDENTIALS:
Uses AWS credentials from environment variables (AWS_ACCESS_KEY_ID,
AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, or an active IAM role).
OUTPUT:
[OK] bucket (region): Added cost-control rule (...)
[--] bucket (region): Already covered
[!!] bucket (region): Error - reason
EXIT CODES:
0 All buckets processed successfully
1 One or more buckets encountered errors
EOF
}
# ---------------------------------------------------------------------------
# Argument parsing
# ---------------------------------------------------------------------------
MODE=""
while [[ $# -gt 0 ]]; do
case "$1" in
-m|--mode)
if [[ -z "${2:-}" ]]; then
echo "ERROR: --mode requires an argument" >&2
usage >&2
exit 1
fi
MODE="$2"
shift 2
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown option: $1" >&2
usage >&2
exit 1
;;
esac
done
if [[ -z "$MODE" ]]; then
echo "ERROR: --mode is required" >&2
usage >&2
exit 1
fi
if [[ "$MODE" != "1" && "$MODE" != "2" && "$MODE" != "3" ]]; then
echo "ERROR: Invalid mode '${MODE}'. Must be 1, 2, or 3." >&2
usage >&2
exit 1
fi
# ---------------------------------------------------------------------------
# Dependency checks
# ---------------------------------------------------------------------------
for cmd in aws jq; do
if ! command -v "$cmd" &>/dev/null; then
echo "ERROR: Required command not found: $cmd" >&2
exit 1
fi
done
# ---------------------------------------------------------------------------
# Credential validation
# ---------------------------------------------------------------------------
echo "Validating AWS credentials..."
if ! IDENTITY_JSON=$(aws sts get-caller-identity --output json 2>&1); then
echo "ERROR: AWS credential validation failed:" >&2
echo "$IDENTITY_JSON" >&2
exit 1
fi
ACCOUNT_ID=$(echo "$IDENTITY_JSON" | jq -r '.Account')
IDENTITY_ARN=$(echo "$IDENTITY_JSON" | jq -r '.Arn')
# Account alias — graceful fallback if IAM:ListAccountAliases is denied
ACCOUNT_ALIAS="N/A"
if ALIAS_JSON=$(aws iam list-account-aliases --output json 2>/dev/null); then
_alias=$(echo "$ALIAS_JSON" | jq -r '.AccountAliases[0] // empty')
[[ -n "$_alias" ]] && ACCOUNT_ALIAS="$_alias"
fi
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
echo "Account: ${ACCOUNT_ID} (${ACCOUNT_ALIAS})"
echo "Identity: ${IDENTITY_ARN}"
case "$MODE" in
1) echo "Mode: 1 — DRY RUN (no changes will be applied)" ;;
2) echo "Mode: 2 — Apply both: incomplete multipart uploads (${ABORT_DAYS}d) + expired delete markers" ;;
3) echo "Mode: 3 — Apply only: incomplete multipart uploads (${ABORT_DAYS}d)" ;;
esac
echo ""
# ---------------------------------------------------------------------------
# List buckets
# ---------------------------------------------------------------------------
if ! BUCKETS_JSON=$(aws s3api list-buckets --output json 2>&1); then
echo "ERROR: Failed to list S3 buckets:" >&2
echo "$BUCKETS_JSON" >&2
exit 1
fi
BUCKET_NAMES=$(echo "$BUCKETS_JSON" | jq -r '.Buckets[].Name')
if [[ -z "$BUCKET_NAMES" ]]; then
echo "No S3 buckets found in this account."
exit 0
fi
# ---------------------------------------------------------------------------
# Per-bucket tracking arrays
# ---------------------------------------------------------------------------
declare -a RESULTS_BUCKET
declare -a RESULTS_REGION
declare -a RESULTS_ACTION
count_added=0
count_covered=0
count_error=0
# ---------------------------------------------------------------------------
# Helper: resolve bucket region
# ---------------------------------------------------------------------------
resolve_region() {
local bucket="$1"
local loc
# @decision DEC-LIFECYCLE-002
# get-bucket-location returns null JSON for us-east-1 (legacy default region),
# the string "None" in some SDK versions, and "EU" as an alias for eu-west-1.
# We normalize all three here.
if ! loc=$(aws s3api get-bucket-location \
--bucket "$bucket" \
--output json 2>&1); then
echo "" # signal error to caller via empty string
return 1
fi
local raw
raw=$(echo "$loc" | jq -r '.LocationConstraint // "us-east-1"')
case "$raw" in
"null"|"None"|"") echo "us-east-1" ;;
"EU") echo "eu-west-1" ;;
*) echo "$raw" ;;
esac
}
# ---------------------------------------------------------------------------
# Helper: get existing lifecycle rules (empty array on NoSuchLifecycleConfiguration)
# ---------------------------------------------------------------------------
get_existing_rules() {
local bucket="$1"
local region="$2"
local response
if ! response=$(aws s3api get-bucket-lifecycle-configuration \
--bucket "$bucket" \
--region "$region" \
--output json 2>&1); then
# NoSuchLifecycleConfiguration is the expected "empty" case
if echo "$response" | grep -q "NoSuchLifecycleConfiguration"; then
echo "[]"
return 0
fi
# Any other error is a real error
echo ""
return 1
fi
# Extract the Rules array; default to [] if missing
echo "$response" | jq '.Rules // []'
}
# ---------------------------------------------------------------------------
# Helper: check coverage of existing rules
# ---------------------------------------------------------------------------
# Outputs two lines: "abort_covered=true|false" and "marker_covered=true|false"
check_coverage() {
local rules_json="$1"
local abort_covered marker_covered
# @decision DEC-LIFECYCLE-003
# We check ALL rules (not just "cost-control") because another tool may have
# already configured AbortIncompleteMultipartUpload in a different rule.
# The goal is idempotent cost coverage, not ownership of a specific rule name.
abort_covered=$(echo "$rules_json" | jq '
any(.[]; .AbortIncompleteMultipartUpload.DaysAfterInitiation != null)
')
marker_covered=$(echo "$rules_json" | jq '
any(.[]; .Expiration.ExpiredObjectDeleteMarker == true)
')
echo "abort_covered=${abort_covered}"
echo "marker_covered=${marker_covered}"
}
# ---------------------------------------------------------------------------
# Helper: build the new cost-control rule (only missing fields included)
# ---------------------------------------------------------------------------
build_rule() {
local need_abort="$1" # "true" | "false"
local need_marker="$2" # "true" | "false"
# @decision DEC-LIFECYCLE-004
# jq is used for all JSON construction (never string interpolation) to avoid
# malformed JSON from bucket names or region values containing special chars.
# The --argjson flags pass boolean values as proper JSON booleans.
jq -n \
--arg id "$RULE_ID" \
--argjson abort_days "$ABORT_DAYS" \
--argjson need_abort "$need_abort" \
--argjson need_marker "$need_marker" \
'{
"ID": $id,
"Status": "Enabled",
"Filter": {"Prefix": ""}
}
+ (if $need_abort then {"AbortIncompleteMultipartUpload": {"DaysAfterInitiation": $abort_days}} else {} end)
+ (if $need_marker then {"Expiration": {"ExpiredObjectDeleteMarker": true}} else {} end)
'
}
# ---------------------------------------------------------------------------
# Helper: merge new rule into existing list, replacing any prior cost-control rule
# ---------------------------------------------------------------------------
merge_rules() {
local existing_rules="$1"
local new_rule="$2"
# Remove any prior "cost-control" rule (idempotent re-runs), then append new one
echo "$existing_rules" | jq \
--arg id "$RULE_ID" \
--argjson new_rule "$new_rule" \
'[ .[] | select(.ID != $id) ] + [$new_rule]'
}
# ---------------------------------------------------------------------------
# Main processing loop
# ---------------------------------------------------------------------------
echo "Processing buckets..."
echo ""
while IFS= read -r bucket; do
# -- Resolve region -------------------------------------------------------
region=""
if ! region=$(resolve_region "$bucket"); then
msg="Error - could not determine bucket region"
echo "[!!] ${bucket}: ${msg}"
RESULTS_BUCKET+=("$bucket")
RESULTS_REGION+=("unknown")
RESULTS_ACTION+=("Error: could not determine region")
(( count_error++ )) || true
continue
fi
# -- Get existing rules ---------------------------------------------------
existing_rules=""
if ! existing_rules=$(get_existing_rules "$bucket" "$region"); then
msg="Error - could not retrieve lifecycle configuration"
echo "[!!] ${bucket} (${region}): ${msg}"
RESULTS_BUCKET+=("$bucket")
RESULTS_REGION+=("$region")
RESULTS_ACTION+=("Error: could not retrieve lifecycle config")
(( count_error++ )) || true
continue
fi
# -- Check coverage -------------------------------------------------------
coverage=$(check_coverage "$existing_rules")
abort_covered=$(echo "$coverage" | grep "abort_covered=" | cut -d= -f2)
marker_covered=$(echo "$coverage" | grep "marker_covered=" | cut -d= -f2)
# -- Determine coverage based on mode --------------------------------------
# Mode 3 only cares about abort coverage; mode 2 requires both.
if [[ "$MODE" == "3" ]]; then
if [[ "$abort_covered" == "true" ]]; then
echo "[--] ${bucket} (${region}): Already covered"
RESULTS_BUCKET+=("$bucket")
RESULTS_REGION+=("$region")
RESULTS_ACTION+=("Already covered")
(( count_covered++ )) || true
continue
fi
else
# Modes 1 and 2 — check both abort and marker coverage
if [[ "$abort_covered" == "true" && "$marker_covered" == "true" ]]; then
echo "[--] ${bucket} (${region}): Already covered"
RESULTS_BUCKET+=("$bucket")
RESULTS_REGION+=("$region")
RESULTS_ACTION+=("Already covered")
(( count_covered++ )) || true
continue
fi
fi
# -- Determine what we're adding ------------------------------------------
need_abort="true"; [[ "$abort_covered" == "true" ]] && need_abort="false"
# Mode 3: never add expired delete markers
if [[ "$MODE" == "3" ]]; then
need_marker="false"
else
need_marker="true"; [[ "$marker_covered" == "true" ]] && need_marker="false"
fi
# Build human-readable description of what's being added
added_parts=()
[[ "$need_abort" == "true" ]] && added_parts+=("incomplete multipart uploads (${ABORT_DAYS}d)")
[[ "$need_marker" == "true" ]] && added_parts+=("expired delete markers")
added_desc=$(IFS=", "; echo "${added_parts[*]}")
# -- Build merged lifecycle config ----------------------------------------
new_rule=$(build_rule "$need_abort" "$need_marker")
merged_rules=$(merge_rules "$existing_rules" "$new_rule")
full_config=$(jq -n --argjson rules "$merged_rules" '{"Rules": $rules}')
# -- Apply or dry-run -----------------------------------------------------
if [[ "$MODE" == "1" ]]; then
echo "[OK] ${bucket} (${region}): [DRY RUN] Would add cost-control rule (${added_desc})"
RESULTS_BUCKET+=("$bucket")
RESULTS_REGION+=("$region")
RESULTS_ACTION+=("[DRY RUN] Would add: ${added_desc}")
(( count_added++ )) || true
continue
fi
# @decision DEC-LIFECYCLE-005
# put-bucket-lifecycle-configuration replaces the ENTIRE config atomically.
# We pass the merged rules (existing + new) to avoid silently dropping user rules.
# Exit code is checked directly — not string-matched on output — per the spec.
put_err=""
if put_err=$(aws s3api put-bucket-lifecycle-configuration \
--bucket "$bucket" \
--region "$region" \
--lifecycle-configuration "$full_config" \
2>&1); then
echo "[OK] ${bucket} (${region}): Added cost-control rule (${added_desc})"
RESULTS_BUCKET+=("$bucket")
RESULTS_REGION+=("$region")
RESULTS_ACTION+=("Added cost-control rule (${added_desc})")
(( count_added++ )) || true
else
err_msg=$(echo "$put_err" | tr '\n' ' ')
# Extract the error code for a cleaner summary line (portable — no grep -P)
err_code=$(echo "$err_msg" | grep -oE '\([A-Za-z]+\)' | head -1 | tr -d '()')
err_code="${err_code:-PutFailed}"
echo "[!!] ${bucket} (${region}): Error - ${err_code}"
RESULTS_BUCKET+=("$bucket")
RESULTS_REGION+=("$region")
RESULTS_ACTION+=("Error: ${err_code}")
(( count_error++ )) || true
fi
done <<< "$BUCKET_NAMES"
# ---------------------------------------------------------------------------
# Summary report
# ---------------------------------------------------------------------------
total_buckets=$(( count_added + count_covered + count_error ))
echo ""
echo "========================================"
echo "S3 Lifecycle Cost Control - Summary"
echo "========================================"
printf "AWS Account: %s (%s)\n" "$ACCOUNT_ID" "$ACCOUNT_ALIAS"
printf "Identity ARN: %s\n" "$IDENTITY_ARN"
printf "Timestamp: %s\n" "$TIMESTAMP"
case "$MODE" in
1) printf "Mode: 1 — DRY RUN\n" ;;
2) printf "Mode: 2 — Apply both rules\n" ;;
3) printf "Mode: 3 — Apply incomplete multipart uploads only\n" ;;
esac
echo ""
printf "Buckets processed: %d\n" "$total_buckets"
printf " Added cost-control rule: %d\n" "$count_added"
printf " Already covered: %d\n" "$count_covered"
printf " Errors: %d\n" "$count_error"
if [[ ${#RESULTS_BUCKET[@]} -gt 0 ]]; then
echo ""
printf " %-40s %-15s %s\n" "BUCKET NAME" "REGION" "ACTION"
printf " %-40s %-15s %s\n" "----------------------------------------" "---------------" "------------------------------"
for i in "${!RESULTS_BUCKET[@]}"; do
printf " %-40s %-15s %s\n" \
"${RESULTS_BUCKET[$i]}" \
"${RESULTS_REGION[$i]}" \
"${RESULTS_ACTION[$i]}"
done
fi
echo "========================================"
# Exit 1 if any errors occurred
if [[ $count_error -gt 0 ]]; then
exit 1
fi
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment