Last active
October 29, 2024 08:38
-
-
Save thepushkarp/ad8d5818fa364cee04eaaa5836adf706 to your computer and use it in GitHub Desktop.
Dependency cleanup script for Python and Node.js projects
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Dependency cleanup script for Python and Node.js projects. | |
Finds and removes virtualenv and node_modules directories that haven't been | |
modified within a specified time period. | |
Requirements: | |
- Python 3.6+ (uses f-strings) | |
- No external dependencies required (uses only standard library) | |
Usage: | |
python dep_cleaner.py /path/to/projects # Basic usage | |
python dep_cleaner.py /path/to/projects --dry-run # Preview only | |
python dep_cleaner.py /path/to/projects --respect-project-activity # Check project activity | |
python dep_cleaner.py /path/to/projects --deps venv # Clean only virtual environments | |
python dep_cleaner.py /path/to/projects --months 6 # Change time threshold | |
""" | |
import os | |
import shutil | |
import time | |
from datetime import datetime | |
import argparse | |
from typing import Optional, Tuple, List, Dict | |
from enum import Enum | |
class DependencyType(Enum): | |
VENV = "venv" | |
NODE_MODULES = "node_modules" | |
def __str__(self): | |
return self.value | |
def format_size(size_bytes: int) -> str: | |
"""Convert size in bytes to human readable format.""" | |
for unit in ['B', 'KB', 'MB', 'GB']: | |
if size_bytes < 1024: | |
return f"{size_bytes:.1f}{unit}" | |
size_bytes /= 1024 | |
return f"{size_bytes:.1f}TB" | |
def get_directory_size(directory: str) -> int: | |
"""Calculate total size of a directory.""" | |
total_size = 0 | |
try: | |
# Walk the directory without recursing into subdirectories | |
with os.scandir(directory) as entries: | |
for entry in entries: | |
if entry.is_file(): | |
try: | |
total_size += entry.stat().st_size | |
except OSError: | |
continue | |
elif entry.is_dir(): | |
try: | |
total_size += get_directory_size(entry.path) | |
except OSError: | |
continue | |
except OSError: | |
return 0 | |
return total_size | |
def check_project_activity(project_dir: str, dep_dir: str, seconds_threshold: int) -> Tuple[bool, Optional[str], float]: | |
""" | |
Check if there has been any activity in the project directory within the threshold period, | |
excluding the dependency directory. | |
Args: | |
project_dir: Path to the project directory | |
dep_dir: Path to the dependency directory to exclude | |
seconds_threshold: Time threshold in seconds | |
Returns: | |
Tuple of (is_active: bool, latest_file: Optional[str], latest_time: float) | |
""" | |
latest_time = 0 | |
latest_file = None | |
dep_path = os.path.normpath(dep_dir) | |
current_time = time.time() | |
try: | |
for root, dirs, files in os.walk(project_dir): | |
# Skip dependency directories entirely | |
dirs[:] = [d for d in dirs if os.path.normpath(os.path.join(root, d)) != dep_path] | |
# Check files in current directory | |
for file in files: | |
file_path = os.path.join(root, file) | |
try: | |
mtime = os.path.getmtime(file_path) | |
if mtime > latest_time: | |
latest_time = mtime | |
latest_file = file_path | |
except OSError: | |
continue | |
except OSError as e: | |
print(f"Error accessing {project_dir}: {e}") | |
return False, None, 0 | |
is_active = (current_time - latest_time) <= seconds_threshold | |
return is_active, latest_file, latest_time | |
def cleanup_dependencies( | |
start_dir: str, | |
months: int = 3, | |
dry_run: bool = True, | |
dep_types: Optional[List[DependencyType]] = None, | |
respect_project_activity: bool = False | |
) -> None: | |
""" | |
Find and remove dependency directories that haven't been modified in the specified months. | |
Args: | |
start_dir: Directory to start the search from | |
months: Number of months of inactivity before removing | |
dry_run: If True, only print what would be done without actually removing | |
dep_types: List of dependency types to clean up | |
respect_project_activity: If True, don't remove deps if project has recent activity | |
""" | |
if dep_types is None: | |
dep_types = list(DependencyType) | |
seconds_threshold = months * 30 * 24 * 60 * 60 # Approximate months to seconds | |
current_time = time.time() | |
# Track statistics for each dependency type | |
stats: Dict[DependencyType, Dict[str, int]] = { | |
dep_type: {"count": 0, "space": 0} for dep_type in dep_types | |
} | |
for root, dirs, _ in os.walk(start_dir): | |
# Skip dependency directories in walk | |
for dep_type in dep_types: | |
dep_name = str(dep_type) | |
if dep_name in dirs: | |
dirs.remove(dep_name) # Prevent recursing into other dependency directories | |
dep_path = os.path.join(root, dep_name) | |
try: | |
# Just check the directory's own modification time | |
latest_time = os.path.getmtime(dep_path) | |
except OSError: | |
continue | |
age_seconds = current_time - latest_time | |
# Check project activity if requested | |
project_active = False | |
project_latest_file = None | |
project_latest_time = 0 | |
if respect_project_activity: | |
project_dir = os.path.dirname(dep_path) | |
project_active, project_latest_file, project_latest_time = check_project_activity( | |
project_dir, dep_path, seconds_threshold | |
) | |
if age_seconds > seconds_threshold and (not respect_project_activity or not project_active): | |
dep_size = get_directory_size(dep_path) | |
last_modified = datetime.fromtimestamp(latest_time).strftime('%Y-%m-%d %H:%M:%S') | |
if dry_run: | |
print(f"\nWould remove {dep_type} directory:") | |
else: | |
print(f"\nRemoving {dep_type} directory:") | |
print(f" Path: {dep_path}") | |
print(f" Size: {format_size(dep_size)}") | |
print(f" Last modified: {last_modified}") | |
if not dry_run: | |
try: | |
shutil.rmtree(dep_path) | |
stats[dep_type]["space"] += dep_size | |
stats[dep_type]["count"] += 1 | |
print(" ✓ Removed successfully") | |
except OSError as e: | |
print(f" ✗ Error removing: {e}") | |
else: | |
stats[dep_type]["space"] += dep_size | |
stats[dep_type]["count"] += 1 | |
elif respect_project_activity and project_active and age_seconds > seconds_threshold: | |
print(f"\nSkipping {dep_type} directory (project has recent activity):") | |
print(f" Path: {dep_path}") | |
print(f" Most recent project activity: {datetime.fromtimestamp(project_latest_time).strftime('%Y-%m-%d %H:%M:%S')}") | |
if project_latest_file: | |
print(f" Most recently modified file: {os.path.relpath(project_latest_file, os.path.dirname(dep_path))}") | |
# Print summary | |
print(f"\n{'Dry run summary:' if dry_run else 'Operation complete:'}") | |
for dep_type in dep_types: | |
count = stats[dep_type]["count"] | |
space = stats[dep_type]["space"] | |
action = "Would remove" if dry_run else "Removed" | |
print(f"{action} {count} {dep_type} directories") | |
print(f"{action} {format_size(space)} from {dep_type} directories") | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Clean up Python virtual environments and Node.js modules that haven't been modified recently" | |
) | |
parser.add_argument( | |
"directory", | |
help="Directory to search for dependency directories" | |
) | |
parser.add_argument( | |
"-m", "--months", | |
type=int, | |
default=3, | |
help="Remove directories not modified in this many months (default: 3)" | |
) | |
parser.add_argument( | |
"--dry-run", | |
action="store_true", | |
help="Show what would be removed without actually removing anything" | |
) | |
parser.add_argument( | |
"--deps", | |
type=DependencyType, | |
choices=list(DependencyType), | |
nargs="+", | |
help="Specify which dependency types to clean up (default: all)" | |
) | |
parser.add_argument( | |
"--respect-project-activity", | |
action="store_true", | |
help="Don't remove dependencies if their parent project has had recent activity" | |
) | |
args = parser.parse_args() | |
if not os.path.isdir(args.directory): | |
print(f"Error: {args.directory} is not a directory") | |
return | |
cleanup_dependencies( | |
args.directory, | |
args.months, | |
args.dry_run, | |
args.deps, | |
args.respect_project_activity | |
) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment