Skip to content

Instantly share code, notes, and snippets.

@phaneesh
Created April 16, 2025 04:47
Show Gist options
  • Save phaneesh/da358351fd1cbce834f0d96d523adc4f to your computer and use it in GitHub Desktop.
Save phaneesh/da358351fd1cbce834f0d96d523adc4f to your computer and use it in GitHub Desktop.
PostgreSQL maintenance

Features

  • Runs VACUUM ANALYZE on all tables or selected tables
  • Optional VACUUM FULL for more aggressive space reclamation
  • Optional REINDEX to rebuild all indexes
  • Includes table size reporting before and after optimization
  • Supports filtering tables (include/exclude lists)
  • Provides detailed progress and timing information

How to Use the Script

  • First, install the required dependency:
  • pip install psycopg2-binary
  • Run the script with the required parameters:
  • python postgres_vacuum.py --dbname your_database --user your_username
  • For more options:
  • python postgres_vacuum.py --help

Important Command-Line Options

Basic connection options:

  • --host - Database host (default: localhost)
  • --port - Database port (default: 5432)
  • --dbname - Database name (required)
  • --user - Database user (required)
  • --password - Database password (or use PGPASSWORD env var)

Optimization options:

  • --vacuum-full - Run VACUUM FULL (reclaims more space but locks tables)
  • --reindex - Rebuild all indexes
  • --schema - Schema to process (default: public)
  • --timeout - Statement timeout in seconds (default: 600)

Table selection:

  • --include - Only process these tables (space-separated list)
  • --exclude - Tables to exclude (space-separated list)

Output control:

  • --verbose - Print detailed progress information
#!/usr/bin/env python3
"""
Script to vacuum and optimize all tables in a PostgreSQL 14 database.
This script connects to a PostgreSQL database and performs the following operations:
1. Lists all tables in the database
2. Runs VACUUM ANALYZE on each table to reclaim space and update statistics
3. Runs REINDEX on each table to rebuild indexes
4. Optionally runs VACUUM FULL for more aggressive space reclamation
"""
import argparse
import psycopg2
import sys
import time
from datetime import datetime
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description='Vacuum and optimize all tables in a PostgreSQL database.')
# Connection parameters
parser.add_argument('--host', default='localhost', help='Database host (default: localhost)')
parser.add_argument('--port', type=int, default=5432, help='Database port (default: 5432)')
parser.add_argument('--dbname', required=True, help='Database name')
parser.add_argument('--user', required=True, help='Database user')
parser.add_argument('--password', help='Database password (or use PGPASSWORD env var)')
# Operation parameters
parser.add_argument('--schema', default='public', help='Schema to process (default: public)')
parser.add_argument('--vacuum-full', action='store_true',
help='Run VACUUM FULL (slower but reclaims more space)')
parser.add_argument('--reindex', action='store_true',
help='Rebuild all indexes (can be time-consuming)')
parser.add_argument('--exclude', nargs='+', default=[],
help='Tables to exclude (space-separated list)')
parser.add_argument('--include', nargs='+', default=[],
help='Only process these tables (space-separated list)')
parser.add_argument('--timeout', type=int, default=600,
help='Statement timeout in seconds (default: 600)')
parser.add_argument('--verbose', action='store_true',
help='Print detailed progress information')
return parser.parse_args()
def connect_to_db(args):
"""Connect to the PostgreSQL database."""
try:
# Build connection string
conn_params = {
'host': args.host,
'port': args.port,
'dbname': args.dbname,
'user': args.user
}
# Add password if provided
if args.password:
conn_params['password'] = args.password
# Connect to the database
conn = psycopg2.connect(**conn_params)
conn.autocommit = True
# Set statement timeout
with conn.cursor() as cur:
cur.execute(f"SET statement_timeout = {args.timeout * 1000}")
print(f"Connected to PostgreSQL database: {args.dbname} on {args.host}:{args.port}")
return conn
except Exception as e:
print(f"Error connecting to the database: {e}")
sys.exit(1)
def get_tables(conn, schema, include_tables, exclude_tables):
"""Get list of tables in the specified schema."""
try:
with conn.cursor() as cur:
query = """
SELECT tablename
FROM pg_tables
WHERE schemaname = %s
ORDER BY tablename
"""
cur.execute(query, (schema,))
all_tables = [row[0] for row in cur.fetchall()]
# Filter tables based on include/exclude lists
if include_tables:
tables = [t for t in all_tables if t in include_tables]
else:
tables = [t for t in all_tables if t not in exclude_tables]
return tables
except Exception as e:
print(f"Error fetching tables: {e}")
return []
def get_table_sizes(conn, schema, tables):
"""Get size information for tables."""
sizes = {}
try:
with conn.cursor() as cur:
for table in tables:
query = """
SELECT
pg_size_pretty(pg_total_relation_size(%s)) as total_size,
pg_size_pretty(pg_relation_size(%s)) as table_size,
pg_size_pretty(pg_total_relation_size(%s) - pg_relation_size(%s)) as index_size
FROM pg_class
WHERE relname = %s
"""
cur.execute(query, (f"{schema}.{table}", f"{schema}.{table}",
f"{schema}.{table}", f"{schema}.{table}", table))
result = cur.fetchone()
if result:
sizes[table] = {
'total': result[0],
'table': result[1],
'index': result[2]
}
except Exception as e:
print(f"Error getting table sizes: {e}")
return sizes
def vacuum_table(conn, schema, table, full_vacuum, verbose):
"""Run VACUUM ANALYZE on a table."""
start_time = time.time()
table_name = f"{schema}.{table}"
try:
with conn.cursor() as cur:
# Choose between VACUUM ANALYZE and VACUUM FULL ANALYZE
if full_vacuum:
print(f"Running VACUUM FULL ANALYZE on {table_name}...")
query = f"VACUUM FULL ANALYZE {table_name}"
else:
print(f"Running VACUUM ANALYZE on {table_name}...")
query = f"VACUUM ANALYZE {table_name}"
cur.execute(query)
elapsed = time.time() - start_time
print(f"✓ Completed in {elapsed:.2f} seconds")
return True
except Exception as e:
print(f"Error vacuuming table {table_name}: {e}")
return False
def reindex_table(conn, schema, table, verbose):
"""Rebuild all indexes on a table."""
start_time = time.time()
table_name = f"{schema}.{table}"
try:
with conn.cursor() as cur:
print(f"Reindexing {table_name}...")
query = f"REINDEX TABLE {table_name}"
cur.execute(query)
elapsed = time.time() - start_time
print(f"✓ Completed in {elapsed:.2f} seconds")
return True
except Exception as e:
print(f"Error reindexing table {table_name}: {e}")
return False
def analyze_table(conn, schema, table, verbose):
"""Run ANALYZE on a table to update statistics."""
table_name = f"{schema}.{table}"
try:
with conn.cursor() as cur:
if verbose:
print(f"Updating statistics for {table_name}...")
query = f"ANALYZE {table_name}"
cur.execute(query)
return True
except Exception as e:
print(f"Error analyzing table {table_name}: {e}")
return False
def print_summary(table_count, success_count, error_count, start_time):
"""Print summary of operations."""
elapsed = time.time() - start_time
print("\n" + "=" * 60)
print(f"Summary:")
print(f" Total tables processed: {table_count}")
print(f" Successful operations: {success_count}")
print(f" Failed operations: {error_count}")
print(f" Total time: {elapsed:.2f} seconds")
print("=" * 60)
def main():
"""Main function to vacuum and optimize PostgreSQL tables."""
args = parse_arguments()
main_start_time = time.time()
print(f"Starting PostgreSQL vacuum and optimization at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Target database: {args.dbname} on {args.host}:{args.port}")
# Connect to the database
conn = connect_to_db(args)
# Get list of tables
tables = get_tables(conn, args.schema, args.include, args.exclude)
if not tables:
print(f"No tables found in schema '{args.schema}' matching criteria.")
return
print(f"Found {len(tables)} tables to process in schema '{args.schema}'")
# Get table sizes before optimization
if args.verbose:
print("\nGathering table size information before optimization...")
before_sizes = get_table_sizes(conn, args.schema, tables)
print("\nCurrent table sizes:")
print(f"{'Table':<30} {'Total Size':<12} {'Table Size':<12} {'Index Size':<12}")
print("-" * 70)
for table, size in before_sizes.items():
print(f"{table:<30} {size['total']:<12} {size['table']:<12} {size['index']:<12}")
print()
# Process tables
success_count = 0
error_count = 0
for i, table in enumerate(tables, 1):
print(f"\n[{i}/{len(tables)}] Processing {args.schema}.{table}")
# Vacuum the table
if vacuum_table(conn, args.schema, table, args.vacuum_full, args.verbose):
success_count += 1
else:
error_count += 1
# Reindex if requested
if args.reindex:
if reindex_table(conn, args.schema, table, args.verbose):
success_count += 1
else:
error_count += 1
# Get table sizes after optimization
if args.verbose:
print("\nGathering table size information after optimization...")
after_sizes = get_table_sizes(conn, args.schema, tables)
print("\nTable sizes after optimization:")
print(f"{'Table':<30} {'Total Size':<12} {'Table Size':<12} {'Index Size':<12}")
print("-" * 70)
for table, size in after_sizes.items():
print(f"{table:<30} {size['total']:<12} {size['table']:<12} {size['index']:<12}")
# Print summary
print_summary(len(tables), success_count, error_count, main_start_time)
# Close the connection
conn.close()
print(f"\nOptimization completed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment