Skip to content

Instantly share code, notes, and snippets.

@greyhoundforty
Created April 16, 2025 10:23
Show Gist options
  • Save greyhoundforty/32e1fc32bd2b7b606d76c00e79fd57d5 to your computer and use it in GitHub Desktop.
Save greyhoundforty/32e1fc32bd2b7b606d76c00e79fd57d5 to your computer and use it in GitHub Desktop.
Python Code Directory Scanner

Directory Scanner by Type

A Python utility for scanning directories to identify code projects by file types and Git repository status.

Features

  • Scans directories recursively to a configurable depth
  • Detects programming languages and file types based on extensions
  • Identifies Git repositories
  • Presents results in a formatted table using Rich

Requirements

  • Python 3.12
    • rich
    • markdown-it-py
    • mdurl
    • pygments

Usage

Run the script with optional parameters:

  • path: Base directory to scan (default: current directory)
  • --depth, -d: Maximum recursion depth (default: 1)
python code_scanner.py [path] [--depth DEPTH]

code_scanner.py

#!/usr/bin/env python3
import os
import argparse
import subprocess
from pathlib import Path
from typing import Dict, List, Tuple, Optional
from rich.console import Console
from rich.table import Table

def is_git_repo(directory: str) -> bool:
    """Check if a directory is a git repository."""
    try:
        # Run git command to check if this is a git repository
        result = subprocess.run(
            ["git", "-C", directory, "rev-parse", "--is-inside-work-tree"],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            check=False
        )
        return result.returncode == 0
    except Exception:
        return False

def detect_code_type(directory: str) -> List[str]:
    """Detect code types in a directory based on file extensions."""
    code_types = set()
    file_type_mapping = {
        '.tf': 'Terraform',
        '.py': 'Python',
        '.js': 'JavaScript',
        '.ts': 'TypeScript',
        '.jsx': 'React',
        '.tsx': 'React/TypeScript',
        '.go': 'Go',
        '.java': 'Java',
        '.cs': 'C#',
        '.cpp': 'C++',
        '.c': 'C',
        '.php': 'PHP',
        '.rb': 'Ruby',
        '.rs': 'Rust',
        '.swift': 'Swift',
        '.kt': 'Kotlin',
        '.scala': 'Scala',
        '.sh': 'Shell',
        '.json': 'JSON',
        '.yml': 'YAML',
        '.yaml': 'YAML',
        '.xml': 'XML',
        '.html': 'HTML',
        '.css': 'CSS',
        '.scss': 'SCSS',
        '.md': 'Markdown',
    }
    
    try:
        for root, _, files in os.walk(directory):
            for file in files:
                _, ext = os.path.splitext(file)
                if ext in file_type_mapping:
                    code_types.add(file_type_mapping[ext])
            # Don't recurse deeper for this directory scan
            break
    except Exception as e:
        print(f"Error scanning directory {directory}: {e}")
    
    return sorted(list(code_types))

def scan_directories(base_path: str, max_depth: int = 1) -> List[Tuple[str, List[str], bool]]:
    """
    Scan directories up to a specified depth.
    Returns a list of tuples: (directory_path, code_types, is_git_repo)
    """
    results = []
    base_path = os.path.abspath(base_path)
    
    def scan_recursive(current_path: str, current_depth: int):
        if current_depth > max_depth:
            return
        
        try:
            # Get immediate subdirectories
            with os.scandir(current_path) as entries:
                for entry in entries:
                    if entry.is_dir() and not entry.name.startswith('.'):
                        dir_path = entry.path
                        code_types = detect_code_type(dir_path)
                        git_repo = is_git_repo(dir_path)
                        if code_types:  # Only include directories with detected code
                            results.append((dir_path, code_types, git_repo))
                        
                        # Recurse into subdirectories if needed
                        if current_depth < max_depth:
                            scan_recursive(dir_path, current_depth + 1)
        except Exception as e:
            print(f"Error accessing {current_path}: {e}")
    
    scan_recursive(base_path, 1)
    return results

def display_results(results: List[Tuple[str, List[str], bool]], base_path: str):
    """Display results in a Rich table."""
    console = Console()
    
    table = Table(show_header=True, header_style="bold magenta")
    table.add_column("Directory", style="dim")
    table.add_column("Code Types")
    table.add_column("Git Repository", justify="center")
    
    for dir_path, code_types, is_git in results:
        # Make path relative to base path for cleaner display
        try:
            rel_path = os.path.relpath(dir_path, base_path)
        except ValueError:
            rel_path = dir_path
            
        table.add_row(
            rel_path,
            ", ".join(code_types) if code_types else "None",
            "✓" if is_git else "✗"
        )
    
    console.print(table)


def main():
    parser = argparse.ArgumentParser(description='Scan directories for code types and git repositories.')
    parser.add_argument('path', nargs='?', default='.', help='Base directory to scan (default: current directory)')
    parser.add_argument('--depth', '-d', type=int, default=1, help='Maximum recursion depth (default: 1)')
    args = parser.parse_args()
    
    console = Console()
    console.print(f"[bold blue]Scanning directories in {os.path.abspath(args.path)} with max depth {args.depth}...[/bold blue]")
    
    results = scan_directories(args.path, args.depth)
    
    if not results:
        console.print("[yellow]No code directories found![/yellow]")
        return
    
    display_results(results, args.path)
    console.print(f"[green]Found {len(results)} code directories.[/green]")

if __name__ == "__main__":
    main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment