Last active
March 21, 2025 16:48
-
-
Save mw3i/35dc2ac191130d4c49f97fbebbc142f1 to your computer and use it in GitHub Desktop.
200-line bash script to talk to an LLM prompted with your repo structure + any READMEs it finds (leveraging python and openai)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# # # # # # # # # # # # # # # # # # # # # # # # # # | |
# SETTINGS | |
# # # # # # # # # # # # # # | |
TARGET_DIR="${1:-./}" # Default target directory | |
API_KEY="${2:-$OPENAI_API_KEY}" # Default API key path | |
MODEL="${3:-gpt-4}" # Default model | |
MAX_DEPTH="${4:-3}" # Default max depth for tree search | |
FILE_SIZE_LIMIT_MB="${5:-8}" # Default file size limit (MB) for the README | |
# # # # # # # # # # # # # # # # # # # # # # # # # # | |
# ABOUT | |
# # # # # # # # # # # # # # | |
show_help() { | |
cat << EOF | |
Usage: $(basename "$0") [OPTIONS] [TARGET_DIR] [API_KEY] [MODEL] [MAX_DEPTH] [FILE_SIZE_LIMIT_MB] | |
A single bash script that runs an interactive shell with an LLM prompted on the README files | |
and file structure of a specified folder (default: ./). | |
Options: | |
-h, --help Show this help message and exit. | |
Arguments: (you can set defaults at the top of this script) | |
TARGET_DIR The directory to analyze (default: ./) | |
API_KEY OpenAI API key (default: reads from environment) | |
MODEL LLM model to use (default: gpt-4) | |
MAX_DEPTH Depth for scanning the directory (default: 3) | |
FILE_SIZE_LIMIT_MB Max file size for processing (default: 8MB) | |
Example: | |
$(basename "$0") /path/to/project \$OPENAI_API_KEY gpt-4 3 8 | |
EOF | |
} | |
# Check for --help flag | |
if [[ "$1" == "-h" || "$1" == "--help" ]]; then | |
show_help | |
exit 0 | |
fi | |
# # # # # # # # # # # # # # # # # # # # # # # # # # | |
# PROGRAM | |
# # # # # # # # # # # # # # | |
echo "--- README-CHAT ---" | |
echo "[START]" | |
# CAPTURE TARGET DIRECTORY STRUCTURE | |
# ------------------------------------- | |
echo "Saving target directory structure with max depth: $MAX_DEPTH" | |
TREE_OUTPUT=$(find "$TARGET_DIR" -mindepth 1 -maxdepth "$MAX_DEPTH" -type f -printf "%p\n" | jq -R -s -c 'split("\n")[:-1]') # <-- Capture directory structure with limited depth | |
README_FILES=$(find "$TARGET_DIR" -mindepth 1 -maxdepth "$MAX_DEPTH" -type f \( -iname "README.md" -o -iname "README.txt" -o -iname "README" \) -printf "%p\n" | jq -R -s -c 'split("\n")[:-1]') # <-- Capture README files | |
echo "Captured $(echo "$TREE_OUTPUT" | jq '. | length') files in tree structure." | |
echo "Captured $(echo "$README_FILES" | jq '. | length') README files." | |
# RUN PYTHON LLM + SHELL | |
# ------------------------------------- | |
echo "Prompting model..." | |
$py3 <<EOF | |
import os, sys, json | |
import openai | |
# Load variables from Bash | |
model = "$MODEL" | |
api_key = "$API_KEY" | |
file_structure = json.loads('$TREE_OUTPUT') | |
readme_files = json.loads('$README_FILES') | |
max_depth = $MAX_DEPTH | |
file_size_limit = $FILE_SIZE_LIMIT_MB | |
# OpenAI client setup | |
openai.api_key = api_key | |
# Build optimized prompt for LLM | |
## Read and concatenate README file contents | |
def read_readme_files(file_paths): | |
readme_content = {} | |
for file in file_paths: | |
try: | |
with open(file, "r", encoding="utf-8") as f: | |
readme_content[file] = f.read() | |
except Exception as e: | |
readme_content[file] = f"[ERROR] Could not read file: {e}" | |
return readme_content | |
## Fetch actual README file contents | |
readme_contents = read_readme_files(readme_files) | |
## Prompt | |
prompt = f""" | |
You are an AI assistant trained to answer questions about an organization's information repository. | |
The file organization is as follows: | |
{json.dumps(file_structure, indent=2)} | |
The following README files provide key documentation: | |
{json.dumps(readme_files, indent=2)} | |
Here are the full contents of the README files: | |
{json.dumps(readme_contents, indent=2)} | |
Your task is to help users navigate this information repository and answer their questions accurately. | |
""" | |
print(prompt) | |
# Function to query OpenAI | |
chat_history = [ # <-- Initialize conversation history | |
{"role": "system", "content": prompt} # System message stays constant | |
] | |
def query_llm(user_input): | |
client = openai.OpenAI(api_key=api_key) # Initialize client | |
# Append user message to chat history | |
chat_history.append({"role": "user", "content": user_input}) | |
# Send request with history and streaming enabled | |
response = client.chat.completions.create( | |
model=model, | |
messages=chat_history, # Full conversation history | |
stream=True # Enable streaming | |
) | |
print("\n[AI]: ", end="", flush=True) # Show AI label without newline | |
ai_response = "" # Store response for history | |
for chunk in response: # Stream tokens as they arrive | |
if chunk.choices[0].delta.content: # Check if content exists | |
token = chunk.choices[0].delta.content | |
print(token, end="", flush=True) # Print token immediately | |
ai_response += token # Store response | |
print("\n") # Newline after response is complete | |
# Append AI response to chat history | |
chat_history.append({"role": "assistant", "content": ai_response}) | |
# Chat loop | |
print("\n=== README-CHAT ===") | |
print("Type your question about the information repository. Type 'exit' to quit.\n") | |
print("[ --- CHAT --- ]\n") | |
import sys | |
# Open a direct connection to the terminal (works even if stdin is detached) | |
try: | |
input_source = open('/dev/tty') | |
except OSError: | |
print("Error: No terminal input available.") | |
sys.exit(1) | |
while True: | |
try: | |
print(">> ", end="", flush=True) # Display input prompt | |
user_input = input_source.readline().strip() # Read user input | |
if user_input.lower() in ["exit", "quit"]: | |
print("Goodbye!") | |
break | |
query_llm(user_input) # Stream AI response in real-time | |
except EOFError: | |
print("\n[INFO] EOF detected. Exiting chat session.") | |
break | |
EOF | |
echo "[END]" | |
# # # # # # # # # # # # | |
# Example Output: | |
# ----- | |
# === README-CHAT === | |
# Type your question about the information repository. Type 'exit' to quit. | |
# | |
# [ --- CHAT --- ] | |
# | |
# >> what tools should the data scientist use? | |
# | |
# [AI]: The Data Scientist is recommended to use the following tools and libraries: | |
# | |
# 1. **Python Libraries** | |
# - `ibis`: A Python library for building SQL queries. | |
# - `sqlalchemy` or `dataset`: These are Python libraries for database operations such as inserting and updating data. | |
# - `fastapi`: This is used to serve requests from the web page. | |
# | |
# 2. **Database Visualization App** | |
# - This could be either Beekeeper Studio or MySQL Workbench. These applications provide a graphical user interface to interact with databases. | |
# | |
# 3. `sshfs`: This is used to locally mount the lab drive to one's local machine.|| | |
# | |
# 4. A text editor or Integrated Development Environment (IDE) to write and test the scripts.|| | |
# | |
# 5. An SSH client to connect to the server and execute scripts.|| | |
# | |
# >> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment