Skip to content

Instantly share code, notes, and snippets.

@thepushkarp
Last active October 29, 2024 09:05
Show Gist options
  • Save thepushkarp/1e105b9a04de83b45a7d7ad70d1fa938 to your computer and use it in GitHub Desktop.
Save thepushkarp/1e105b9a04de83b45a7d7ad70d1fa938 to your computer and use it in GitHub Desktop.
Deletes unnecessary folders that take huge space. Useful when moving directories.
#!/usr/bin/env python3
"""
Development Project Folder Cleaner.
A utility script to safely clean up common unnecessary folders in development projects
such as node_modules, build directories, cache folders, and virtual environments.
Includes safety checks, dry run mode, and detailed logging.
Dependencies:
- Python 3.6 or higher
- No external packages required (uses only standard library)
Usage:
Basic:
python delete_unnecessary_folders.py
With options:
python delete_unnecessary_folders.py [path] [options]
Options:
--dry-run Simulate deletion without removing files
--min-age HOURS Only delete folders older than specified hours
--exclude PATHS Paths to exclude from cleanup (space-separated)
--log-file FILE Path to log file
--folders NAMES Specific folders to target (space-separated)
Examples:
python delete_unnecessary_folders.py # Clean current directory
python delete_unnecessary_folders.py --dry-run # Simulate cleanup
python delete_unnecessary_folders.py /path/to/project # Clean specific path
python delete_unnecessary_folders.py --min-age 24 # Only delete folders older than 24h
python delete_unnecessary_folders.py --exclude /path/to/keep # Exclude specific paths
python delete_unnecessary_folders.py --folders node_modules build # Target specific folders
Author: Generated by Claude
License: MIT
"""
import os
import shutil
import argparse
import logging
from typing import Set, Optional
from datetime import datetime
import sys
from pathlib import Path
import time
# Default folders to delete
DEFAULT_FOLDERS = {
'node_modules',
'.next',
'venv',
'build',
'env',
'.vscode',
'__pycache__',
'.pytest_cache',
'.idea',
'temp',
'dist',
'coverage',
'.mypy_cache',
'.sass-cache',
'.parcel-cache'
}
# Folders that should never be deleted
PROTECTED_PATHS = {
'/',
'/usr',
'/bin',
'/sbin',
'/etc',
'/var',
'/home',
'/lib',
'/lib64',
'/boot',
'/root',
'/opt',
'/sys',
'/proc'
}
class FolderCleaner:
def __init__(
self,
target_folders: Optional[Set[str]] = None,
dry_run: bool = False,
min_age_hours: float = 0,
exclude_paths: Optional[Set[str]] = None,
log_file: Optional[str] = None
):
"""
Initialize the FolderCleaner with the given configuration.
Args:
target_folders: Set of folder names to delete
dry_run: If True, only simulate deletion
min_age_hours: Minimum age of folders to delete (in hours)
exclude_paths: Set of paths to exclude from cleaning
log_file: Path to log file (if None, log to console only)
"""
self.target_folders = target_folders or DEFAULT_FOLDERS
self.dry_run = dry_run
self.min_age_hours = min_age_hours
self.exclude_paths = exclude_paths or set()
self.exclude_paths.update(PROTECTED_PATHS)
self.stats = {'deleted': 0, 'errors': 0, 'space_saved': 0}
# Setup logging
self.setup_logging(log_file)
def setup_logging(self, log_file: Optional[str]) -> None:
"""Configure logging to both file and console."""
self.logger = logging.getLogger('FolderCleaner')
self.logger.setLevel(logging.INFO)
formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# Console handler
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
self.logger.addHandler(console_handler)
# File handler (if specified)
if log_file:
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(formatter)
self.logger.addHandler(file_handler)
def get_folder_size(self, path: str) -> int:
"""Calculate the total size of a folder in bytes."""
total_size = 0
try:
for dirpath, dirnames, filenames in os.walk(path):
for filename in filenames:
file_path = os.path.join(dirpath, filename)
try:
total_size += os.path.getsize(file_path)
except (OSError, FileNotFoundError):
continue
except (OSError, PermissionError) as e:
self.logger.warning(f"Error calculating size of {path}: {e}")
return total_size
def is_folder_old_enough(self, path: str) -> bool:
"""Check if the folder is older than min_age_hours."""
if self.min_age_hours <= 0:
return True
try:
mtime = os.path.getmtime(path)
age_hours = (time.time() - mtime) / 3600
return age_hours >= self.min_age_hours
except (OSError, FileNotFoundError) as e:
self.logger.warning(f"Error checking age of {path}: {e}")
return False
def is_safe_to_delete(self, path: str) -> bool:
"""
Check if it's safe to delete the given path.
Returns:
bool: True if safe to delete, False otherwise
"""
abs_path = os.path.abspath(path)
# Check against protected and excluded paths
if any(abs_path.startswith(os.path.abspath(p)) for p in self.exclude_paths):
return False
# Check if path exists and is a directory
if not os.path.isdir(abs_path):
return False
# Additional safety checks
try:
# Check if we have write permission
if not os.access(abs_path, os.W_OK):
return False
# Check if path is a symlink
if os.path.islink(abs_path):
return False
except (OSError, PermissionError):
return False
return True
def delete_folder(self, path: str) -> None:
"""Safely delete a folder and update statistics."""
try:
size = self.get_folder_size(path)
if self.dry_run:
self.logger.info(f"[DRY RUN] Would delete: {path} (Size: {size / 1024 / 1024:.2f} MB)")
return
shutil.rmtree(path)
self.stats['deleted'] += 1
self.stats['space_saved'] += size
self.logger.info(f"Successfully deleted: {path} (Size: {size / 1024 / 1024:.2f} MB)")
except Exception as e:
self.stats['errors'] += 1
self.logger.error(f"Error deleting {path}: {e}")
def clean_folders(self, start_path: str = '.') -> None:
"""
Main method to clean unnecessary folders starting from the given path.
Args:
start_path: The root path to start cleaning from
"""
self.logger.info(f"Starting folder cleanup from: {os.path.abspath(start_path)}")
self.logger.info(f"Target folders: {', '.join(sorted(self.target_folders))}")
self.logger.info(f"Dry run: {self.dry_run}")
try:
for root, dirs, _ in os.walk(start_path, topdown=True):
# Update dirs in place to skip excluded paths
dirs[:] = [d for d in dirs if os.path.join(root, d) not in self.exclude_paths]
for dir_name in dirs[:]: # Copy the list to avoid modification during iteration
if dir_name in self.target_folders:
full_path = os.path.join(root, dir_name)
if not self.is_safe_to_delete(full_path):
self.logger.warning(f"Skipping unsafe path: {full_path}")
continue
if not self.is_folder_old_enough(full_path):
self.logger.info(f"Skipping folder not old enough: {full_path}")
continue
self.delete_folder(full_path)
except KeyboardInterrupt:
self.logger.warning("Cleanup interrupted by user")
except Exception as e:
self.logger.error(f"Unexpected error during cleanup: {e}")
finally:
self.print_summary()
def print_summary(self) -> None:
"""Print summary of the cleanup operation."""
self.logger.info("\nCleanup Summary:")
self.logger.info(f"Folders deleted: {self.stats['deleted']}")
self.logger.info(f"Space saved: {self.stats['space_saved'] / 1024 / 1024:.2f} MB")
self.logger.info(f"Errors encountered: {self.stats['errors']}")
def main():
parser = argparse.ArgumentParser(
description="Clean unnecessary folders from development projects",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'path',
nargs='?',
default='.',
help='Starting path for cleanup'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Simulate deletion without actually removing files'
)
parser.add_argument(
'--min-age',
type=float,
default=0,
help='Minimum age of folders to delete (in hours)'
)
parser.add_argument(
'--exclude',
nargs='*',
default=[],
help='Paths to exclude from cleanup'
)
parser.add_argument(
'--log-file',
help='Path to log file'
)
parser.add_argument(
'--folders',
nargs='*',
help='Specific folders to target (defaults to predefined list)'
)
args = parser.parse_args()
# Create cleaner instance
cleaner = FolderCleaner(
target_folders=set(args.folders) if args.folders else None,
dry_run=args.dry_run,
min_age_hours=args.min_age,
exclude_paths=set(args.exclude),
log_file=args.log_file
)
# Run cleanup
cleaner.clean_folders(args.path)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment