BowTiedSwan · August 28, 2024 20:51
diff --git a/repo_to_text_local.py b/repo_to_text_local.py
 import os
 from tqdm import tqdm

 def get_readme_content(repo_path):
    """
    Retrieve the content of the README file.
    """
    readme_path = os.path.join(repo_path, "README.md")
    try:
        with open(readme_path, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        return "README not found."

 def traverse_repo_iteratively(repo_path):
    """
    Traverse the repository iteratively to avoid recursion limits for large repositories.
    """
    structure = ""
    dirs_to_visit = [(repo_path, "")]
    dirs_visited = set()

    while dirs_to_visit:
        current_path, relative_path = dirs_to_visit.pop()
        dirs_visited.add(current_path)
        for entry in tqdm(os.listdir(current_path), desc=f"Processing {relative_path}", leave=False):
            if entry in ['venv', '.git', '.idea', 'repototext.py', 'node_modules', '.next', '.vscode', 'yarn.lock', '/yarn.lock', '.yarn.lock', './yarn.lock']:
                continue  # Ignore the 'venv', '.git' directories and 'repototext.py' file
            full_path = os.path.join(current_path, entry)
            if os.path.isdir(full_path):
                if full_path not in dirs_visited:
                    structure += f"{relative_path}/{entry}/\n"
                    dirs_to_visit.append((full_path, f"{relative_path}/{entry}"))
            else:
                if entry == '.env':
                    continue  # Ignore the '.env' file
                structure += f"{relative_path}/{entry}\n"
    return structure

 def get_file_contents_iteratively(repo_path):
    file_contents = ""
    dirs_to_visit = [(repo_path, "")]
    dirs_visited = set()
    binary_extensions = [
        # Compiled executables and libraries
        '.exe', '.dll', '.so', '.a', '.lib', '.dylib', '.o', '.obj',
        # Compressed archives
        '.zip', '.tar', '.tar.gz', '.tgz', '.rar', '.7z', '.bz2', '.gz', '.xz', '.z', '.lz', '.lzma', '.lzo', '.rz', '.sz', '.dz',
        # Application-specific files
        '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.odt', '.ods', '.odp',
        # Media files (less common)
        '.png', '.jpg', '.jpeg', '.gif', '.mp3', '.mp4', '.wav', '.flac', '.ogg', '.avi', '.mkv', '.mov', '.webm', '.wmv', '.m4a', '.aac',
        # Virtual machine and container images
        '.iso', '.vmdk', '.qcow2', '.vdi', '.vhd', '.vhdx', '.ova', '.ovf',
        # Database files
        '.db', '.sqlite', '.mdb', '.accdb', '.frm', '.ibd', '.dbf',
        # Java-related files
        '.jar', '.class', '.war', '.ear', '.jpi',
        # Python bytecode and packages
        '.pyc', '.pyo', '.pyd', '.egg', '.whl',
        # Other potentially important extensions
        '.deb', '.rpm', '.apk', '.msi', '.dmg', '.pkg', '.bin', '.dat', '.data',
        '.dump', '.img', '.toast', '.vcd', '.crx', '.xpi', '.lockb', 'package-lock.json', '.svg' ,
        '.eot', '.otf', '.ttf', '.woff', '.woff2',
        '.ico', '.icns', '.cur',
        '.cab', '.dmp', '.msp', '.msm',
        '.keystore', '.jks', '.truststore', '.cer', '.crt', '.der', '.p7b', '.p7c', '.p12', '.pfx', '.pem', '.csr',
        '.key', '.pub', '.sig', '.pgp', '.gpg',
        '.nupkg', '.snupkg', '.appx', '.msix', '.msp', '.msu',
        '.deb', '.rpm', '.snap', '.flatpak', '.appimage',
        '.ko', '.sys', '.elf',
        '.swf', '.fla', '.swc',
        '.rlib', '.pdb', '.idb', '.pdb', '.dbg',
        '.sdf', '.bak', '.tmp', '.temp', '.log', '.tlog', '.ilk',
        '.bpl', '.dcu', '.dcp', '.dcpil', '.drc',
        '.aps', '.res', '.rsrc', '.rc', '.resx',
        '.prefs', '.properties', '.ini', '.cfg', '.config', '.conf',
        '.DS_Store', '.localized', '.svn', '.git', '.gitignore', '.gitkeep',
    ]

    while dirs_to_visit:
        current_path, relative_path = dirs_to_visit.pop()
        dirs_visited.add(current_path)
        for entry in tqdm(os.listdir(current_path), desc=f"Downloading {relative_path}", leave=False):
            if entry in ['venv', '.git', 'repototext.py', 'yarn.lock', 'node_modules', '.next', '.vscode', '/yarn.lock']:
                continue  # Ignore the 'venv', '.git' directories and 'repototext.py' file
            full_path = os.path.join(current_path, entry)
            if os.path.isdir(full_path):
                if full_path not in dirs_visited:
                    dirs_to_visit.append((full_path, f"{relative_path}/{entry}"))
            else:
                if entry == '.env':
                    continue  # Ignore the '.env' file
                if any(entry.endswith(ext) for ext in binary_extensions):
                    file_contents += f"File: {relative_path}/{entry}\nContent: Skipped binary file\n\n"
                else:
                    file_contents += f"File: {relative_path}/{entry}\n"
                    try:
                        with open(full_path, 'r', encoding='utf-8') as file:
                            file_contents += "Content:\n" + file.read() + "\n\n"
                    except UnicodeDecodeError:
                        file_contents += "Content: Skipped due to unsupported encoding or binary content\n\n"
    return file_contents

 def get_local_repo_contents(repo_path):
    """
    Main function to get local repository contents.
    """
    repo_name = os.path.basename(repo_path.rstrip(os.sep))

    print(f"Fetching README for: {repo_name}")
    readme_content = get_readme_content(repo_path)

    print(f"\nFetching repository structure for: {repo_name}")
    repo_structure = f"Repository Structure: {repo_name}\n"
    repo_structure += traverse_repo_iteratively(repo_path)

    print(f"\nFetching file contents for: {repo_name}")
    file_contents = get_file_contents_iteratively(repo_path)

    instructions = f"Prompt: Analyze the {repo_name} repository to understand its structure, purpose, and functionality. Follow these steps to study the codebase:\n\n"
    instructions += "1. Read the README file to gain an overview of the project, its goals, and any setup instructions.\n\n"
    instructions += "2. Examine the repository structure to understand how the files and directories are organized.\n\n"
    instructions += "3. Identify the main entry point of the application (e.g., main.py, app.py, index.js) and start analyzing the code flow from there.\n\n"
    instructions += "4. Study the dependencies and libraries used in the project to understand the external tools and frameworks being utilized.\n\n"
    instructions += "5. Analyze the core functionality of the project by examining the key modules, classes, and functions.\n\n"
    instructions += "6. Look for any configuration files (e.g., config.py, .env) to understand how the project is configured and what settings are available.\n\n"
    instructions += "7. Investigate any tests or test directories to see how the project ensures code quality and handles different scenarios.\n\n"
    instructions += "8. Review any documentation or inline comments to gather insights into the codebase and its intended behavior.\n\n"
    instructions += "9. Identify any potential areas for improvement, optimization, or further exploration based on your analysis.\n\n"
    instructions += "10. Provide a summary of your findings, including the project's purpose, key features, and any notable observations or recommendations.\n\n"
    instructions += "Use the files and contents provided below to complete this analysis:\n\n"

    return repo_name, instructions, readme_content, repo_structure, file_contents

 if __name__ == '__main__':
    repo_path = input("Please enter the path to the local repository: ")
    try:
        repo_name, instructions, readme_content, repo_structure, file_contents = get_local_repo_contents(repo_path)
        output_filename = f'{repo_name}_contents.txt'
        with open(output_filename, 'w', encoding='utf-8') as f:
            f.write(instructions)
            f.write(f"README:\n{readme_content}\n\n")
            f.write(repo_structure)
            f.write('\n\n')
            f.write(file_contents)
        print(f"Local repository contents saved to '{output_filename}'.")
    except Exception as e:
        print(f"An error occurred: {e}")
        print("Please check the path to the local repository and try again.")
	import os
	from tqdm import tqdm

	def get_readme_content(repo_path):
	"""
	Retrieve the content of the README file.
	"""
	readme_path = os.path.join(repo_path, "README.md")
	try:
	with open(readme_path, 'r', encoding='utf-8') as file:
	return file.read()
	except FileNotFoundError:
	return "README not found."

	def traverse_repo_iteratively(repo_path):
	"""
	Traverse the repository iteratively to avoid recursion limits for large repositories.
	"""
	structure = ""
	dirs_to_visit = [(repo_path, "")]
	dirs_visited = set()

	while dirs_to_visit:
	current_path, relative_path = dirs_to_visit.pop()
	dirs_visited.add(current_path)
	for entry in tqdm(os.listdir(current_path), desc=f"Processing {relative_path}", leave=False):
	if entry in ['venv', '.git', '.idea', 'repototext.py', 'node_modules', '.next', '.vscode', 'yarn.lock', '/yarn.lock', '.yarn.lock', './yarn.lock']:
	continue # Ignore the 'venv', '.git' directories and 'repototext.py' file
	full_path = os.path.join(current_path, entry)
	if os.path.isdir(full_path):
	if full_path not in dirs_visited:
	structure += f"{relative_path}/{entry}/\n"
	dirs_to_visit.append((full_path, f"{relative_path}/{entry}"))
	else:
	if entry == '.env':
	continue # Ignore the '.env' file
	structure += f"{relative_path}/{entry}\n"
	return structure

	def get_file_contents_iteratively(repo_path):
	file_contents = ""
	dirs_to_visit = [(repo_path, "")]
	dirs_visited = set()
	binary_extensions = [
	# Compiled executables and libraries
	'.exe', '.dll', '.so', '.a', '.lib', '.dylib', '.o', '.obj',
	# Compressed archives
	'.zip', '.tar', '.tar.gz', '.tgz', '.rar', '.7z', '.bz2', '.gz', '.xz', '.z', '.lz', '.lzma', '.lzo', '.rz', '.sz', '.dz',
	# Application-specific files
	'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.odt', '.ods', '.odp',
	# Media files (less common)
	'.png', '.jpg', '.jpeg', '.gif', '.mp3', '.mp4', '.wav', '.flac', '.ogg', '.avi', '.mkv', '.mov', '.webm', '.wmv', '.m4a', '.aac',
	# Virtual machine and container images
	'.iso', '.vmdk', '.qcow2', '.vdi', '.vhd', '.vhdx', '.ova', '.ovf',
	# Database files
	'.db', '.sqlite', '.mdb', '.accdb', '.frm', '.ibd', '.dbf',
	# Java-related files
	'.jar', '.class', '.war', '.ear', '.jpi',
	# Python bytecode and packages
	'.pyc', '.pyo', '.pyd', '.egg', '.whl',
	# Other potentially important extensions
	'.deb', '.rpm', '.apk', '.msi', '.dmg', '.pkg', '.bin', '.dat', '.data',
	'.dump', '.img', '.toast', '.vcd', '.crx', '.xpi', '.lockb', 'package-lock.json', '.svg' ,
	'.eot', '.otf', '.ttf', '.woff', '.woff2',
	'.ico', '.icns', '.cur',
	'.cab', '.dmp', '.msp', '.msm',
	'.keystore', '.jks', '.truststore', '.cer', '.crt', '.der', '.p7b', '.p7c', '.p12', '.pfx', '.pem', '.csr',
	'.key', '.pub', '.sig', '.pgp', '.gpg',
	'.nupkg', '.snupkg', '.appx', '.msix', '.msp', '.msu',
	'.deb', '.rpm', '.snap', '.flatpak', '.appimage',
	'.ko', '.sys', '.elf',
	'.swf', '.fla', '.swc',
	'.rlib', '.pdb', '.idb', '.pdb', '.dbg',
	'.sdf', '.bak', '.tmp', '.temp', '.log', '.tlog', '.ilk',
	'.bpl', '.dcu', '.dcp', '.dcpil', '.drc',
	'.aps', '.res', '.rsrc', '.rc', '.resx',
	'.prefs', '.properties', '.ini', '.cfg', '.config', '.conf',
	'.DS_Store', '.localized', '.svn', '.git', '.gitignore', '.gitkeep',
	]

	while dirs_to_visit:
	current_path, relative_path = dirs_to_visit.pop()
	dirs_visited.add(current_path)
	for entry in tqdm(os.listdir(current_path), desc=f"Downloading {relative_path}", leave=False):
	if entry in ['venv', '.git', 'repototext.py', 'yarn.lock', 'node_modules', '.next', '.vscode', '/yarn.lock']:
	continue # Ignore the 'venv', '.git' directories and 'repototext.py' file
	full_path = os.path.join(current_path, entry)
	if os.path.isdir(full_path):
	if full_path not in dirs_visited:
	dirs_to_visit.append((full_path, f"{relative_path}/{entry}"))
	else:
	if entry == '.env':
	continue # Ignore the '.env' file
	if any(entry.endswith(ext) for ext in binary_extensions):
	file_contents += f"File: {relative_path}/{entry}\nContent: Skipped binary file\n\n"
	else:
	file_contents += f"File: {relative_path}/{entry}\n"
	try:
	with open(full_path, 'r', encoding='utf-8') as file:
	file_contents += "Content:\n" + file.read() + "\n\n"
	except UnicodeDecodeError:
	file_contents += "Content: Skipped due to unsupported encoding or binary content\n\n"
	return file_contents

	def get_local_repo_contents(repo_path):
	"""
	Main function to get local repository contents.
	"""
	repo_name = os.path.basename(repo_path.rstrip(os.sep))

	print(f"Fetching README for: {repo_name}")
	readme_content = get_readme_content(repo_path)

	print(f"\nFetching repository structure for: {repo_name}")
	repo_structure = f"Repository Structure: {repo_name}\n"
	repo_structure += traverse_repo_iteratively(repo_path)

	print(f"\nFetching file contents for: {repo_name}")
	file_contents = get_file_contents_iteratively(repo_path)

	instructions = f"Prompt: Analyze the {repo_name} repository to understand its structure, purpose, and functionality. Follow these steps to study the codebase:\n\n"
	instructions += "1. Read the README file to gain an overview of the project, its goals, and any setup instructions.\n\n"
	instructions += "2. Examine the repository structure to understand how the files and directories are organized.\n\n"
	instructions += "3. Identify the main entry point of the application (e.g., main.py, app.py, index.js) and start analyzing the code flow from there.\n\n"
	instructions += "4. Study the dependencies and libraries used in the project to understand the external tools and frameworks being utilized.\n\n"
	instructions += "5. Analyze the core functionality of the project by examining the key modules, classes, and functions.\n\n"
	instructions += "6. Look for any configuration files (e.g., config.py, .env) to understand how the project is configured and what settings are available.\n\n"
	instructions += "7. Investigate any tests or test directories to see how the project ensures code quality and handles different scenarios.\n\n"
	instructions += "8. Review any documentation or inline comments to gather insights into the codebase and its intended behavior.\n\n"
	instructions += "9. Identify any potential areas for improvement, optimization, or further exploration based on your analysis.\n\n"
	instructions += "10. Provide a summary of your findings, including the project's purpose, key features, and any notable observations or recommendations.\n\n"
	instructions += "Use the files and contents provided below to complete this analysis:\n\n"

	return repo_name, instructions, readme_content, repo_structure, file_contents

	if __name__ == '__main__':
	repo_path = input("Please enter the path to the local repository: ")
	try:
	repo_name, instructions, readme_content, repo_structure, file_contents = get_local_repo_contents(repo_path)
	output_filename = f'{repo_name}_contents.txt'
	with open(output_filename, 'w', encoding='utf-8') as f:
	f.write(instructions)
	f.write(f"README:\n{readme_content}\n\n")
	f.write(repo_structure)
	f.write('\n\n')
	f.write(file_contents)
	print(f"Local repository contents saved to '{output_filename}'.")
	except Exception as e:
	print(f"An error occurred: {e}")
	print("Please check the path to the local repository and try again.")