Skip to content

Instantly share code, notes, and snippets.

@thepushkarp
Last active October 29, 2024 08:38
Show Gist options
  • Save thepushkarp/ad8d5818fa364cee04eaaa5836adf706 to your computer and use it in GitHub Desktop.
Save thepushkarp/ad8d5818fa364cee04eaaa5836adf706 to your computer and use it in GitHub Desktop.
Dependency cleanup script for Python and Node.js projects
#!/usr/bin/env python3
"""
Dependency cleanup script for Python and Node.js projects.
Finds and removes virtualenv and node_modules directories that haven't been
modified within a specified time period.
Requirements:
- Python 3.6+ (uses f-strings)
- No external dependencies required (uses only standard library)
Usage:
python dep_cleaner.py /path/to/projects # Basic usage
python dep_cleaner.py /path/to/projects --dry-run # Preview only
python dep_cleaner.py /path/to/projects --respect-project-activity # Check project activity
python dep_cleaner.py /path/to/projects --deps venv # Clean only virtual environments
python dep_cleaner.py /path/to/projects --months 6 # Change time threshold
"""
import os
import shutil
import time
from datetime import datetime
import argparse
from typing import Optional, Tuple, List, Dict
from enum import Enum
class DependencyType(Enum):
VENV = "venv"
NODE_MODULES = "node_modules"
def __str__(self):
return self.value
def format_size(size_bytes: int) -> str:
"""Convert size in bytes to human readable format."""
for unit in ['B', 'KB', 'MB', 'GB']:
if size_bytes < 1024:
return f"{size_bytes:.1f}{unit}"
size_bytes /= 1024
return f"{size_bytes:.1f}TB"
def get_directory_size(directory: str) -> int:
"""Calculate total size of a directory."""
total_size = 0
try:
# Walk the directory without recursing into subdirectories
with os.scandir(directory) as entries:
for entry in entries:
if entry.is_file():
try:
total_size += entry.stat().st_size
except OSError:
continue
elif entry.is_dir():
try:
total_size += get_directory_size(entry.path)
except OSError:
continue
except OSError:
return 0
return total_size
def check_project_activity(project_dir: str, dep_dir: str, seconds_threshold: int) -> Tuple[bool, Optional[str], float]:
"""
Check if there has been any activity in the project directory within the threshold period,
excluding the dependency directory.
Args:
project_dir: Path to the project directory
dep_dir: Path to the dependency directory to exclude
seconds_threshold: Time threshold in seconds
Returns:
Tuple of (is_active: bool, latest_file: Optional[str], latest_time: float)
"""
latest_time = 0
latest_file = None
dep_path = os.path.normpath(dep_dir)
current_time = time.time()
try:
for root, dirs, files in os.walk(project_dir):
# Skip dependency directories entirely
dirs[:] = [d for d in dirs if os.path.normpath(os.path.join(root, d)) != dep_path]
# Check files in current directory
for file in files:
file_path = os.path.join(root, file)
try:
mtime = os.path.getmtime(file_path)
if mtime > latest_time:
latest_time = mtime
latest_file = file_path
except OSError:
continue
except OSError as e:
print(f"Error accessing {project_dir}: {e}")
return False, None, 0
is_active = (current_time - latest_time) <= seconds_threshold
return is_active, latest_file, latest_time
def cleanup_dependencies(
start_dir: str,
months: int = 3,
dry_run: bool = True,
dep_types: Optional[List[DependencyType]] = None,
respect_project_activity: bool = False
) -> None:
"""
Find and remove dependency directories that haven't been modified in the specified months.
Args:
start_dir: Directory to start the search from
months: Number of months of inactivity before removing
dry_run: If True, only print what would be done without actually removing
dep_types: List of dependency types to clean up
respect_project_activity: If True, don't remove deps if project has recent activity
"""
if dep_types is None:
dep_types = list(DependencyType)
seconds_threshold = months * 30 * 24 * 60 * 60 # Approximate months to seconds
current_time = time.time()
# Track statistics for each dependency type
stats: Dict[DependencyType, Dict[str, int]] = {
dep_type: {"count": 0, "space": 0} for dep_type in dep_types
}
for root, dirs, _ in os.walk(start_dir):
# Skip dependency directories in walk
for dep_type in dep_types:
dep_name = str(dep_type)
if dep_name in dirs:
dirs.remove(dep_name) # Prevent recursing into other dependency directories
dep_path = os.path.join(root, dep_name)
try:
# Just check the directory's own modification time
latest_time = os.path.getmtime(dep_path)
except OSError:
continue
age_seconds = current_time - latest_time
# Check project activity if requested
project_active = False
project_latest_file = None
project_latest_time = 0
if respect_project_activity:
project_dir = os.path.dirname(dep_path)
project_active, project_latest_file, project_latest_time = check_project_activity(
project_dir, dep_path, seconds_threshold
)
if age_seconds > seconds_threshold and (not respect_project_activity or not project_active):
dep_size = get_directory_size(dep_path)
last_modified = datetime.fromtimestamp(latest_time).strftime('%Y-%m-%d %H:%M:%S')
if dry_run:
print(f"\nWould remove {dep_type} directory:")
else:
print(f"\nRemoving {dep_type} directory:")
print(f" Path: {dep_path}")
print(f" Size: {format_size(dep_size)}")
print(f" Last modified: {last_modified}")
if not dry_run:
try:
shutil.rmtree(dep_path)
stats[dep_type]["space"] += dep_size
stats[dep_type]["count"] += 1
print(" ✓ Removed successfully")
except OSError as e:
print(f" ✗ Error removing: {e}")
else:
stats[dep_type]["space"] += dep_size
stats[dep_type]["count"] += 1
elif respect_project_activity and project_active and age_seconds > seconds_threshold:
print(f"\nSkipping {dep_type} directory (project has recent activity):")
print(f" Path: {dep_path}")
print(f" Most recent project activity: {datetime.fromtimestamp(project_latest_time).strftime('%Y-%m-%d %H:%M:%S')}")
if project_latest_file:
print(f" Most recently modified file: {os.path.relpath(project_latest_file, os.path.dirname(dep_path))}")
# Print summary
print(f"\n{'Dry run summary:' if dry_run else 'Operation complete:'}")
for dep_type in dep_types:
count = stats[dep_type]["count"]
space = stats[dep_type]["space"]
action = "Would remove" if dry_run else "Removed"
print(f"{action} {count} {dep_type} directories")
print(f"{action} {format_size(space)} from {dep_type} directories")
def main():
parser = argparse.ArgumentParser(
description="Clean up Python virtual environments and Node.js modules that haven't been modified recently"
)
parser.add_argument(
"directory",
help="Directory to search for dependency directories"
)
parser.add_argument(
"-m", "--months",
type=int,
default=3,
help="Remove directories not modified in this many months (default: 3)"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be removed without actually removing anything"
)
parser.add_argument(
"--deps",
type=DependencyType,
choices=list(DependencyType),
nargs="+",
help="Specify which dependency types to clean up (default: all)"
)
parser.add_argument(
"--respect-project-activity",
action="store_true",
help="Don't remove dependencies if their parent project has had recent activity"
)
args = parser.parse_args()
if not os.path.isdir(args.directory):
print(f"Error: {args.directory} is not a directory")
return
cleanup_dependencies(
args.directory,
args.months,
args.dry_run,
args.deps,
args.respect_project_activity
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment