Created
May 8, 2026 08:57
-
-
Save greyhoundforty/d328a4af036efdce79e49e44e7e1b9bd to your computer and use it in GitHub Desktop.
Organize Excel Files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| organize_excel.py | |
| Scans a directory of Excel files, extracts a customer name from each | |
| filename, creates a per-customer subfolder in the output directory, | |
| and moves each file into the appropriate folder. | |
| Customer name extraction is controlled by --separator and --position: | |
| - The filename stem (no extension) is split on the separator string | |
| - The segment at --position is used as the customer name | |
| Example filename patterns and the flags to match them: | |
| "Acme Corp_2024-01.xlsx" --separator "_" --position 0 → "Acme Corp" | |
| "2024_Acme Corp_Q1.xlsx" --separator "_" --position 1 → "Acme Corp" | |
| "Acme Corp - Invoice.xlsx" --separator " - " --position 0 → "Acme Corp" | |
| Usage: | |
| python organize_excel.py -i ./raw_files -o ./sorted | |
| python organize_excel.py -i ./raw_files -o ./sorted --separator " - " | |
| python organize_excel.py -i ./raw_files -o ./sorted --dry-run | |
| """ | |
| import argparse | |
| import shutil | |
| import sys | |
| from pathlib import Path | |
| # ── Helpers ──────────────────────────────────────────────────────────────────── | |
| def extract_customer_name(stem: str, separator: str, position: int) -> str | None: | |
| """ | |
| Split the filename stem (no extension) on `separator` and return the | |
| segment at `position` as the customer name. | |
| Returns None when the filename doesn't contain enough separator | |
| occurrences to reach the requested position — which lets the caller | |
| log a useful skip message instead of crashing. | |
| Args: | |
| stem: Filename without extension, e.g. "Acme Corp_2024-01" | |
| separator: String to split on, e.g. "_" or " - " | |
| position: Zero-based index of the customer name segment. | |
| Returns: | |
| Stripped customer name string, or None if not enough parts. | |
| """ | |
| parts = stem.split(separator) | |
| if position >= len(parts): | |
| return None # Not enough segments — caller will log a skip warning | |
| # Strip surrounding whitespace so "Acme Corp " → "Acme Corp" | |
| return parts[position].strip() | |
| def sanitize_folder_name(name: str) -> str: | |
| """ | |
| Replace characters that are illegal in directory names on Windows | |
| (the most restrictive common OS) with underscores. | |
| Illegal characters: \\ / : * ? " < > | | |
| Args: | |
| name: Raw customer name string. | |
| Returns: | |
| A safe-to-use folder name string. | |
| """ | |
| illegal_chars = r'\/:*?"<>|' | |
| for char in illegal_chars: | |
| name = name.replace(char, "_") | |
| return name.strip() | |
| def collect_excel_files(input_dir: Path) -> list[Path]: | |
| """ | |
| Return all Excel files in `input_dir` (top-level only, non-recursive). | |
| Recognises the four most common Excel extensions: | |
| .xlsx - Excel workbook (modern, default) | |
| .xls - Legacy Excel 97-2003 format | |
| .xlsm - Macro-enabled workbook | |
| .xlsb - Binary workbook (large files) | |
| Args: | |
| input_dir: Directory to scan. | |
| Returns: | |
| Sorted list of Path objects for matched files. | |
| """ | |
| excel_extensions = {".xlsx", ".xls", ".xlsm", ".xlsb"} | |
| return sorted( | |
| f for f in input_dir.iterdir() | |
| if f.is_file() and f.suffix.lower() in excel_extensions | |
| ) | |
| # ── Core logic ───────────────────────────────────────────────────────────────── | |
| def organize_files( | |
| input_dir: Path, | |
| output_dir: Path, | |
| separator: str, | |
| position: int, | |
| dry_run: bool, | |
| ) -> None: | |
| """ | |
| Main orchestration: walk input_dir, group files by customer, and move them. | |
| For each Excel file found: | |
| 1. Extract customer name from the filename using separator + position | |
| 2. Sanitize the name so it's safe to use as a directory name | |
| 3. Create the customer folder under output_dir (if it doesn't exist) | |
| 4. Move the file into that folder (skip if destination already exists) | |
| When dry_run=True every step is printed but no files or directories | |
| are actually created or moved — useful for verifying your separator | |
| and position settings before committing. | |
| Args: | |
| input_dir: Source directory containing Excel files. | |
| output_dir: Root destination; customer folders are created inside. | |
| separator: String used to split filenames into segments. | |
| position: Which segment (0-based) holds the customer name. | |
| dry_run: If True, only print actions without executing them. | |
| """ | |
| excel_files = collect_excel_files(input_dir) | |
| if not excel_files: | |
| print(f"No Excel files found in: {input_dir}") | |
| return | |
| print(f"Found {len(excel_files)} Excel file(s) in '{input_dir}'") | |
| if dry_run: | |
| print("── DRY RUN — no files will be moved ──\n") | |
| # Counters for the summary line printed at the end | |
| moved = 0 | |
| skipped = 0 | |
| for file_path in excel_files: | |
| # stem = filename without the extension → "Acme Corp_2024-01" | |
| stem = file_path.stem | |
| customer = extract_customer_name(stem, separator, position) | |
| # ── Could not parse a customer name → skip this file ── | |
| if not customer: | |
| print(f" [SKIP] '{file_path.name}'") | |
| print(f" Splitting '{stem}' on '{separator}' gave fewer " | |
| f"than {position + 1} part(s). Try adjusting --separator or --position.") | |
| skipped += 1 | |
| continue | |
| # Build the destination path | |
| folder_name = sanitize_folder_name(customer) # safe for the filesystem | |
| dest_dir = output_dir / folder_name | |
| dest_file = dest_dir / file_path.name | |
| print(f" {file_path.name}") | |
| print(f" customer → {customer}") | |
| print(f" dest → {dest_file}") | |
| # In dry-run mode we stop here — nothing is written to disk | |
| if dry_run: | |
| moved += 1 | |
| continue | |
| # ── Create the customer folder if it doesn't already exist ── | |
| # mkdir(parents=True) creates any missing intermediate directories. | |
| # exist_ok=True means it won't raise an error if the folder is already there. | |
| dest_dir.mkdir(parents=True, exist_ok=True) | |
| # ── Guard: don't silently overwrite an existing file ── | |
| if dest_file.exists(): | |
| print(f" [WARN] Destination already exists — skipping to avoid overwrite.") | |
| skipped += 1 | |
| continue | |
| # ── Move the file ── | |
| # shutil.move handles both same-filesystem renames and cross-device copies. | |
| shutil.move(str(file_path), str(dest_file)) | |
| moved += 1 | |
| # ── Final summary ── | |
| print() | |
| verb = "Would move" if dry_run else "Moved" | |
| print(f"Done. {verb} {moved} file(s). Skipped {skipped} file(s).") | |
| # ── CLI ──────────────────────────────────────────────────────────────────────── | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser( | |
| prog="organize_excel.py", | |
| description=( | |
| "Organize Excel files into per-customer subdirectories.\n" | |
| "The customer name is extracted from each filename using a\n" | |
| "configurable separator and segment position.\n\n" | |
| "Examples:\n" | |
| " # Files like 'Acme Corp_2024-01.xlsx' (default settings)\n" | |
| " python organize_excel.py -i ./raw -o ./sorted\n\n" | |
| " # Files like 'Acme Corp - Invoice 001.xlsx'\n" | |
| " python organize_excel.py -i ./raw -o ./sorted --separator ' - '\n\n" | |
| " # Files like '2024_Acme Corp_Q1.xlsx' (customer is 2nd segment)\n" | |
| " python organize_excel.py -i ./raw -o ./sorted --position 1\n\n" | |
| " # Preview without moving anything\n" | |
| " python organize_excel.py -i ./raw -o ./sorted --dry-run" | |
| ), | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| ) | |
| parser.add_argument( | |
| "-i", "--input", | |
| required=True, | |
| metavar="DIR", | |
| help="Directory containing the Excel files to organize.", | |
| ) | |
| parser.add_argument( | |
| "-o", "--output", | |
| required=True, | |
| metavar="DIR", | |
| help=( | |
| "Root output directory. " | |
| "A subfolder will be created here for each unique customer name." | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--separator", | |
| default="_", | |
| metavar="SEP", | |
| help=( | |
| "String that separates the customer name from the rest of the filename. " | |
| "Default: '_' → 'Acme Corp_2024-01.xlsx' gives customer 'Acme Corp'." | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--position", | |
| type=int, | |
| default=0, | |
| metavar="N", | |
| help=( | |
| "Zero-based index of the segment that contains the customer name " | |
| "after splitting on --separator. " | |
| "Default: 0 (first segment)." | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--dry-run", | |
| action="store_true", | |
| help=( | |
| "Print what would happen without moving any files. " | |
| "Use this to verify your --separator and --position settings first." | |
| ), | |
| ) | |
| return parser.parse_args() | |
| def main() -> None: | |
| args = parse_args() | |
| # Resolve ~/ home-directory shortcuts and relative paths to absolute paths | |
| input_dir = Path(args.input).expanduser().resolve() | |
| output_dir = Path(args.output).expanduser().resolve() | |
| # ── Validate input directory ── | |
| if not input_dir.exists(): | |
| print(f"Error: input directory does not exist: {input_dir}", file=sys.stderr) | |
| sys.exit(1) | |
| if not input_dir.is_dir(): | |
| print(f"Error: input path is not a directory: {input_dir}", file=sys.stderr) | |
| sys.exit(1) | |
| # ── Create output root now (skip in dry-run so nothing is written) ── | |
| if not args.dry_run: | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| organize_files( | |
| input_dir = input_dir, | |
| output_dir = output_dir, | |
| separator = args.separator, | |
| position = args.position, | |
| dry_run = args.dry_run, | |
| ) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment