Created
March 31, 2025 14:53
-
-
Save bigsnarfdude/f2a69bb58bc37c9fa112bae9e07aa69f to your computer and use it in GitHub Desktop.
prune_worksops.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import os | |
# --- Configuration --- | |
# Define prefixes of directory paths to exclude. | |
# Any file whose path starts with one of these prefixes will be removed. | |
# Add or remove prefixes as needed. | |
EXCLUDE_PREFIXES = ( | |
'app/assets/', # Standard assets, often large and regeneratable | |
'app/javascript/', # JS source, might be large, focus is often backend | |
'bin/', # Executable scripts, often standard Rails/Bundler | |
'config/webpack/', # Webpacker config, less critical for core logic | |
'db/migrate/', # Migrations are history, core logic is in models/schema | |
'db/pg-init/', # DB initialization scripts | |
'node_modules/', # Very large, third-party JS dependencies | |
'public/assets/', # Compiled assets (can be large) | |
'public/packs/', # Compiled webpack assets (can be large) | |
'spec/', # Test files | |
'test/', # Test files (alternative name) | |
'tmp/', # Temporary files | |
'log/', # Log files | |
'storage/', # Uploaded files / ActiveStorage | |
'vendor/', # Third-party code, often large | |
'.circleci/', # CI/CD configuration, less about core logic | |
# Consider adding others if needed: | |
# 'db/schema.rb', # Schema is useful, but large and generated from migrations/models | |
# 'public/icons', # Image assets | |
) | |
# --- End Configuration --- | |
def prune_repomix_file(input_filename="repomix-output.txt", output_filename="repomix-output-pruned.txt"): | |
""" | |
Reads a repomix output file and writes a pruned version, | |
excluding files under specified directory prefixes. | |
""" | |
processing_files = False | |
skip_current_file = False | |
separator_count = 0 # To handle the very start of the file correctly | |
try: | |
original_size = os.path.getsize(input_filename) if os.path.exists(input_filename) else 0 | |
print(f"Reading '{input_filename}' ({original_size / 1024:.2f} KB)...") | |
print(f"Excluding files starting with: {', '.join(EXCLUDE_PREFIXES)}") | |
with open(input_filename, 'r', encoding='utf-8', errors='ignore') as infile, \ | |
open(output_filename, 'w', encoding='utf-8') as outfile: | |
# Write the initial header part until the first file section | |
for line in infile: | |
outfile.write(line) | |
if line.startswith("================================================================"): | |
separator_count += 1 | |
if separator_count >= 3 and line.startswith("File: "): # Start file processing after dir structure | |
break | |
# Process the rest of the file, skipping excluded sections | |
for line in infile: # Continue from where the previous loop left off | |
if line.startswith("File: "): | |
filepath = line.split("File: ", 1)[1].strip() | |
skip_current_file = any(filepath.startswith(prefix) for prefix in EXCLUDE_PREFIXES) | |
# print(f"{'Skipping' if skip_current_file else 'Keeping'} file: {filepath}") # Uncomment for verbose feedback | |
if not skip_current_file: | |
outfile.write(line) | |
pruned_size = os.path.getsize(output_filename) | |
reduction = original_size - pruned_size | |
reduction_percent = (reduction / original_size) * 100 if original_size > 0 else 0 | |
print("\n--- Pruning Summary ---") | |
print(f"Original file size: {original_size / 1024:.2f} KB") | |
print(f"Pruned file size: {pruned_size / 1024:.2f} KB") | |
print(f"Reduction: {reduction / 1024:.2f} KB ({reduction_percent:.2f}%)") | |
print(f"\nPruned file written to: {output_filename}") | |
except FileNotFoundError: | |
print(f"Error: Input file '{input_filename}' not found.") | |
sys.exit(1) | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
sys.exit(1) | |
if __name__ == "__main__": | |
in_file = "repomix-output.txt" | |
out_file = "repomix-output-pruned.txt" | |
if len(sys.argv) > 1: | |
in_file = sys.argv[1] | |
if len(sys.argv) > 2: | |
out_file = sys.argv[2] | |
else: | |
# Create default output name based on input name | |
base, ext = os.path.splitext(in_file) | |
out_file = f"{base}-pruned{ext}" | |
prune_repomix_file(in_file, out_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment