Skip to content

Instantly share code, notes, and snippets.

@andrewnc
Created September 19, 2024 15:20
Show Gist options
  • Save andrewnc/9ec74403d83e3f742734000f8f7ff774 to your computer and use it in GitHub Desktop.
Save andrewnc/9ec74403d83e3f742734000f8f7ff774 to your computer and use it in GitHub Desktop.
uv run git_to_md.py
import os
import sys
import subprocess
import tempfile
import shutil
def main():
if len(sys.argv) != 2:
print("Usage: python script.py <github_repo_url>")
sys.exit(1)
repo_url = sys.argv[1]
# Create a temporary directory
temp_dir = tempfile.mkdtemp()
try:
# Clone the repository into the temporary directory
subprocess.check_call(['git', 'clone', repo_url, temp_dir], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# Define the output directory
output_dir = 'output_folder'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Define the file extensions to include
code_extensions = {
'.py', '.c', '.cpp', '.ipynb', '.java', '.js', '.rb', '.go', '.cs',
'.php', '.html', '.css', '.swift', '.kt', '.m', '.mm', '.ts', '.rs',
'.scala', '.pl', '.sh', '.bat', '.ps1', '.lua', '.erl', '.ex', '.dart',
'.r', '.jl', '.md', ".MD"
}
# Walk through the cloned repository
for root, dirs, files in os.walk(temp_dir):
# Ignore the .git directory
if '.git' in dirs:
dirs.remove('.git')
for file in files:
file_path = os.path.join(root, file)
filename, file_extension = os.path.splitext(file)
if file_extension.lower() in code_extensions:
# Read the file content
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
# Prepare the markdown content
md_content = f'# {file}\n\n```{file_extension[1:]}\n{content}\n```'
# Define the output file path
output_file = os.path.join(output_dir, filename + '.md')
# Ensure unique filenames in the output directory
count = 1
while os.path.exists(output_file):
output_file = os.path.join(output_dir, f"{filename}_{count}.md")
count += 1
# Write the markdown file
with open(output_file, 'w', encoding='utf-8') as f:
f.write(md_content)
print(f"Markdown files have been saved in the folder '{output_dir}'.")
except subprocess.CalledProcessError as e:
print(f"Error cloning repository: {e}")
finally:
# Remove the temporary directory
shutil.rmtree(temp_dir)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment