|
#!/usr/bin/env python3 |
|
""" |
|
Script to vacuum and optimize all tables in a PostgreSQL 14 database. |
|
|
|
This script connects to a PostgreSQL database and performs the following operations: |
|
1. Lists all tables in the database |
|
2. Runs VACUUM ANALYZE on each table to reclaim space and update statistics |
|
3. Runs REINDEX on each table to rebuild indexes |
|
4. Optionally runs VACUUM FULL for more aggressive space reclamation |
|
""" |
|
|
|
import argparse |
|
import psycopg2 |
|
import sys |
|
import time |
|
from datetime import datetime |
|
|
|
|
|
def parse_arguments(): |
|
"""Parse command line arguments.""" |
|
parser = argparse.ArgumentParser( |
|
description='Vacuum and optimize all tables in a PostgreSQL database.') |
|
|
|
# Connection parameters |
|
parser.add_argument('--host', default='localhost', help='Database host (default: localhost)') |
|
parser.add_argument('--port', type=int, default=5432, help='Database port (default: 5432)') |
|
parser.add_argument('--dbname', required=True, help='Database name') |
|
parser.add_argument('--user', required=True, help='Database user') |
|
parser.add_argument('--password', help='Database password (or use PGPASSWORD env var)') |
|
|
|
# Operation parameters |
|
parser.add_argument('--schema', default='public', help='Schema to process (default: public)') |
|
parser.add_argument('--vacuum-full', action='store_true', |
|
help='Run VACUUM FULL (slower but reclaims more space)') |
|
parser.add_argument('--reindex', action='store_true', |
|
help='Rebuild all indexes (can be time-consuming)') |
|
parser.add_argument('--exclude', nargs='+', default=[], |
|
help='Tables to exclude (space-separated list)') |
|
parser.add_argument('--include', nargs='+', default=[], |
|
help='Only process these tables (space-separated list)') |
|
parser.add_argument('--timeout', type=int, default=600, |
|
help='Statement timeout in seconds (default: 600)') |
|
parser.add_argument('--verbose', action='store_true', |
|
help='Print detailed progress information') |
|
|
|
return parser.parse_args() |
|
|
|
|
|
def connect_to_db(args): |
|
"""Connect to the PostgreSQL database.""" |
|
try: |
|
# Build connection string |
|
conn_params = { |
|
'host': args.host, |
|
'port': args.port, |
|
'dbname': args.dbname, |
|
'user': args.user |
|
} |
|
|
|
# Add password if provided |
|
if args.password: |
|
conn_params['password'] = args.password |
|
|
|
# Connect to the database |
|
conn = psycopg2.connect(**conn_params) |
|
conn.autocommit = True |
|
|
|
# Set statement timeout |
|
with conn.cursor() as cur: |
|
cur.execute(f"SET statement_timeout = {args.timeout * 1000}") |
|
|
|
print(f"Connected to PostgreSQL database: {args.dbname} on {args.host}:{args.port}") |
|
return conn |
|
|
|
except Exception as e: |
|
print(f"Error connecting to the database: {e}") |
|
sys.exit(1) |
|
|
|
|
|
def get_tables(conn, schema, include_tables, exclude_tables): |
|
"""Get list of tables in the specified schema.""" |
|
try: |
|
with conn.cursor() as cur: |
|
query = """ |
|
SELECT tablename |
|
FROM pg_tables |
|
WHERE schemaname = %s |
|
ORDER BY tablename |
|
""" |
|
cur.execute(query, (schema,)) |
|
all_tables = [row[0] for row in cur.fetchall()] |
|
|
|
# Filter tables based on include/exclude lists |
|
if include_tables: |
|
tables = [t for t in all_tables if t in include_tables] |
|
else: |
|
tables = [t for t in all_tables if t not in exclude_tables] |
|
|
|
return tables |
|
|
|
except Exception as e: |
|
print(f"Error fetching tables: {e}") |
|
return [] |
|
|
|
|
|
def get_table_sizes(conn, schema, tables): |
|
"""Get size information for tables.""" |
|
sizes = {} |
|
try: |
|
with conn.cursor() as cur: |
|
for table in tables: |
|
query = """ |
|
SELECT |
|
pg_size_pretty(pg_total_relation_size(%s)) as total_size, |
|
pg_size_pretty(pg_relation_size(%s)) as table_size, |
|
pg_size_pretty(pg_total_relation_size(%s) - pg_relation_size(%s)) as index_size |
|
FROM pg_class |
|
WHERE relname = %s |
|
""" |
|
cur.execute(query, (f"{schema}.{table}", f"{schema}.{table}", |
|
f"{schema}.{table}", f"{schema}.{table}", table)) |
|
result = cur.fetchone() |
|
if result: |
|
sizes[table] = { |
|
'total': result[0], |
|
'table': result[1], |
|
'index': result[2] |
|
} |
|
except Exception as e: |
|
print(f"Error getting table sizes: {e}") |
|
|
|
return sizes |
|
|
|
|
|
def vacuum_table(conn, schema, table, full_vacuum, verbose): |
|
"""Run VACUUM ANALYZE on a table.""" |
|
start_time = time.time() |
|
table_name = f"{schema}.{table}" |
|
|
|
try: |
|
with conn.cursor() as cur: |
|
# Choose between VACUUM ANALYZE and VACUUM FULL ANALYZE |
|
if full_vacuum: |
|
print(f"Running VACUUM FULL ANALYZE on {table_name}...") |
|
query = f"VACUUM FULL ANALYZE {table_name}" |
|
else: |
|
print(f"Running VACUUM ANALYZE on {table_name}...") |
|
query = f"VACUUM ANALYZE {table_name}" |
|
|
|
cur.execute(query) |
|
|
|
elapsed = time.time() - start_time |
|
print(f"✓ Completed in {elapsed:.2f} seconds") |
|
return True |
|
|
|
except Exception as e: |
|
print(f"Error vacuuming table {table_name}: {e}") |
|
return False |
|
|
|
|
|
def reindex_table(conn, schema, table, verbose): |
|
"""Rebuild all indexes on a table.""" |
|
start_time = time.time() |
|
table_name = f"{schema}.{table}" |
|
|
|
try: |
|
with conn.cursor() as cur: |
|
print(f"Reindexing {table_name}...") |
|
query = f"REINDEX TABLE {table_name}" |
|
cur.execute(query) |
|
|
|
elapsed = time.time() - start_time |
|
print(f"✓ Completed in {elapsed:.2f} seconds") |
|
return True |
|
|
|
except Exception as e: |
|
print(f"Error reindexing table {table_name}: {e}") |
|
return False |
|
|
|
|
|
def analyze_table(conn, schema, table, verbose): |
|
"""Run ANALYZE on a table to update statistics.""" |
|
table_name = f"{schema}.{table}" |
|
|
|
try: |
|
with conn.cursor() as cur: |
|
if verbose: |
|
print(f"Updating statistics for {table_name}...") |
|
query = f"ANALYZE {table_name}" |
|
cur.execute(query) |
|
return True |
|
|
|
except Exception as e: |
|
print(f"Error analyzing table {table_name}: {e}") |
|
return False |
|
|
|
|
|
def print_summary(table_count, success_count, error_count, start_time): |
|
"""Print summary of operations.""" |
|
elapsed = time.time() - start_time |
|
print("\n" + "=" * 60) |
|
print(f"Summary:") |
|
print(f" Total tables processed: {table_count}") |
|
print(f" Successful operations: {success_count}") |
|
print(f" Failed operations: {error_count}") |
|
print(f" Total time: {elapsed:.2f} seconds") |
|
print("=" * 60) |
|
|
|
|
|
def main(): |
|
"""Main function to vacuum and optimize PostgreSQL tables.""" |
|
args = parse_arguments() |
|
main_start_time = time.time() |
|
|
|
print(f"Starting PostgreSQL vacuum and optimization at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
|
print(f"Target database: {args.dbname} on {args.host}:{args.port}") |
|
|
|
# Connect to the database |
|
conn = connect_to_db(args) |
|
|
|
# Get list of tables |
|
tables = get_tables(conn, args.schema, args.include, args.exclude) |
|
if not tables: |
|
print(f"No tables found in schema '{args.schema}' matching criteria.") |
|
return |
|
|
|
print(f"Found {len(tables)} tables to process in schema '{args.schema}'") |
|
|
|
# Get table sizes before optimization |
|
if args.verbose: |
|
print("\nGathering table size information before optimization...") |
|
before_sizes = get_table_sizes(conn, args.schema, tables) |
|
|
|
print("\nCurrent table sizes:") |
|
print(f"{'Table':<30} {'Total Size':<12} {'Table Size':<12} {'Index Size':<12}") |
|
print("-" * 70) |
|
for table, size in before_sizes.items(): |
|
print(f"{table:<30} {size['total']:<12} {size['table']:<12} {size['index']:<12}") |
|
print() |
|
|
|
# Process tables |
|
success_count = 0 |
|
error_count = 0 |
|
|
|
for i, table in enumerate(tables, 1): |
|
print(f"\n[{i}/{len(tables)}] Processing {args.schema}.{table}") |
|
|
|
# Vacuum the table |
|
if vacuum_table(conn, args.schema, table, args.vacuum_full, args.verbose): |
|
success_count += 1 |
|
else: |
|
error_count += 1 |
|
|
|
# Reindex if requested |
|
if args.reindex: |
|
if reindex_table(conn, args.schema, table, args.verbose): |
|
success_count += 1 |
|
else: |
|
error_count += 1 |
|
|
|
# Get table sizes after optimization |
|
if args.verbose: |
|
print("\nGathering table size information after optimization...") |
|
after_sizes = get_table_sizes(conn, args.schema, tables) |
|
|
|
print("\nTable sizes after optimization:") |
|
print(f"{'Table':<30} {'Total Size':<12} {'Table Size':<12} {'Index Size':<12}") |
|
print("-" * 70) |
|
for table, size in after_sizes.items(): |
|
print(f"{table:<30} {size['total']:<12} {size['table']:<12} {size['index']:<12}") |
|
|
|
# Print summary |
|
print_summary(len(tables), success_count, error_count, main_start_time) |
|
|
|
# Close the connection |
|
conn.close() |
|
print(f"\nOptimization completed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |