mw3i · March 21, 2025 16:48
diff --git a/README-CHAT b/README-CHAT
 #!/bin/bash
 # # # # # # # # # # # # # # # # # # # # # # # # # #
 # SETTINGS
 # # # # # # # # # # # # # #
 TARGET_DIR="${1:-./}"  # Default target directory
 API_KEY="${2:-$OPENAI_API_KEY}"  # Default API key path
 MODEL="${3:-gpt-4}"  # Default model
 MAX_DEPTH="${4:-3}"  # Default max depth for tree search
 FILE_SIZE_LIMIT_MB="${5:-8}"  # Default file size limit (MB) for the README

 # # # # # # # # # # # # # # # # # # # # # # # # # #
 # ABOUT
 # # # # # # # # # # # # # #
 show_help() {
    cat << EOF
 Usage: $(basename "$0") [OPTIONS] [TARGET_DIR] [API_KEY] [MODEL] [MAX_DEPTH] [FILE_SIZE_LIMIT_MB]

 A single bash script that runs an interactive shell with an LLM prompted on the README files
 and file structure of a specified folder (default: ./).

 Options:
  -h, --help          Show this help message and exit.

 Arguments: (you can set defaults at the top of this script)
  TARGET_DIR          The directory to analyze (default: ./)
  API_KEY             OpenAI API key (default: reads from environment)
  MODEL               LLM model to use (default: gpt-4)
  MAX_DEPTH           Depth for scanning the directory (default: 3)
  FILE_SIZE_LIMIT_MB  Max file size for processing (default: 8MB)

 Example:
  $(basename "$0") /path/to/project \$OPENAI_API_KEY gpt-4 3 8
 EOF
 }

 # Check for --help flag
 if [[ "$1" == "-h" || "$1" == "--help" ]]; then
    show_help
    exit 0
 fi

 # # # # # # # # # # # # # # # # # # # # # # # # # #
 # PROGRAM
 # # # # # # # # # # # # # #
 echo "--- README-CHAT ---"
 echo "[START]"

 # CAPTURE TARGET DIRECTORY STRUCTURE
 # -------------------------------------
 echo "Saving target directory structure with max depth: $MAX_DEPTH"
 TREE_OUTPUT=$(find "$TARGET_DIR" -mindepth 1 -maxdepth "$MAX_DEPTH" -type f -printf "%p\n" | jq -R -s -c 'split("\n")[:-1]') # <-- Capture directory structure with limited depth
 README_FILES=$(find "$TARGET_DIR" -mindepth 1 -maxdepth "$MAX_DEPTH" -type f \( -iname "README.md" -o -iname "README.txt" -o -iname "README" \) -printf "%p\n" | jq -R -s -c 'split("\n")[:-1]') # <-- Capture README files

 echo "Captured $(echo "$TREE_OUTPUT" | jq '. | length') files in tree structure."
 echo "Captured $(echo "$README_FILES" | jq '. | length') README files."

 # RUN PYTHON LLM + SHELL
 # -------------------------------------
 echo "Prompting model..."
 $py3 <<EOF
 import os, sys, json

 import openai

 # Load variables from Bash
 model = "$MODEL"
 api_key = "$API_KEY"
 file_structure = json.loads('$TREE_OUTPUT')
 readme_files = json.loads('$README_FILES')
 max_depth = $MAX_DEPTH
 file_size_limit = $FILE_SIZE_LIMIT_MB

 # OpenAI client setup
 openai.api_key = api_key

 # Build optimized prompt for LLM

 ## Read and concatenate README file contents
 def read_readme_files(file_paths):
    readme_content = {}
    for file in file_paths:
        try:
            with open(file, "r", encoding="utf-8") as f:
                readme_content[file] = f.read()
        except Exception as e:
            readme_content[file] = f"[ERROR] Could not read file: {e}"
    return readme_content

 ## Fetch actual README file contents
 readme_contents = read_readme_files(readme_files)

 ## Prompt
 prompt = f"""
 You are an AI assistant trained to answer questions about an organization's information repository.

 The file organization is as follows:
 {json.dumps(file_structure, indent=2)}

 The following README files provide key documentation:
 {json.dumps(readme_files, indent=2)}

 Here are the full contents of the README files:
 {json.dumps(readme_contents, indent=2)}

 Your task is to help users navigate this information repository and answer their questions accurately.
 """

 print(prompt)

 # Function to query OpenAI
 chat_history = [ # <-- Initialize conversation history
    {"role": "system", "content": prompt}  # System message stays constant
 ]

 def query_llm(user_input):
    client = openai.OpenAI(api_key=api_key)  # Initialize client

    # Append user message to chat history
    chat_history.append({"role": "user", "content": user_input})

    # Send request with history and streaming enabled
    response = client.chat.completions.create(
        model=model,
        messages=chat_history,  # Full conversation history
        stream=True  # Enable streaming
    )

    print("\n[AI]: ", end="", flush=True)  # Show AI label without newline
    ai_response = ""  # Store response for history

    for chunk in response:  # Stream tokens as they arrive
        if chunk.choices[0].delta.content:  # Check if content exists
            token = chunk.choices[0].delta.content
            print(token, end="", flush=True)  # Print token immediately
            ai_response += token  # Store response

    print("\n")  # Newline after response is complete

    # Append AI response to chat history
    chat_history.append({"role": "assistant", "content": ai_response})

 # Chat loop
 print("\n=== README-CHAT ===")
 print("Type your question about the information repository. Type 'exit' to quit.\n")
 print("[ --- CHAT --- ]\n")

 import sys

 # Open a direct connection to the terminal (works even if stdin is detached)
 try:
    input_source = open('/dev/tty')
 except OSError:
    print("Error: No terminal input available.")
    sys.exit(1)

 while True:
    try:
        print(">> ", end="", flush=True)  # Display input prompt
        user_input = input_source.readline().strip()  # Read user input

        if user_input.lower() in ["exit", "quit"]:
            print("Goodbye!")
            break

        query_llm(user_input)  # Stream AI response in real-time

    except EOFError:
        print("\n[INFO] EOF detected. Exiting chat session.")
        break
 EOF

 echo "[END]"

 # # # # # # # # # # # # 
 # Example Output:
 # ----- 
 # === README-CHAT ===
 # Type your question about the information repository. Type 'exit' to quit.
 #
 # [ --- CHAT --- ]
 #
 # >> what tools should the data scientist use?
 #
 # [AI]: The Data Scientist is recommended to use the following tools and libraries:
 #
 # 1. **Python Libraries**
 #    - `ibis`: A Python library for building SQL queries.
 #    - `sqlalchemy` or `dataset`: These are Python libraries for database operations such as inserting and updating data.
 #    - `fastapi`: This is used to serve requests from the web page.
 #
 # 2. **Database Visualization App**
 #    - This could be either Beekeeper Studio or MySQL Workbench. These applications provide a graphical user interface to interact with databases.
 #
 # 3. `sshfs`: This is used to locally mount the lab drive to one's local machine.|| 
 #
 # 4. A text editor or Integrated Development Environment (IDE) to write and test the scripts.|| 
 #
 # 5. An SSH client to connect to the server and execute scripts.|| 
 #
 # >>
	#!/bin/bash
	# # # # # # # # # # # # # # # # # # # # # # # # # #
	# SETTINGS
	# # # # # # # # # # # # # #
	TARGET_DIR="${1:-./}" # Default target directory
	API_KEY="${2:-$OPENAI_API_KEY}" # Default API key path
	MODEL="${3:-gpt-4}" # Default model
	MAX_DEPTH="${4:-3}" # Default max depth for tree search
	FILE_SIZE_LIMIT_MB="${5:-8}" # Default file size limit (MB) for the README

	# # # # # # # # # # # # # # # # # # # # # # # # # #
	# ABOUT
	# # # # # # # # # # # # # #
	show_help() {
	cat << EOF
	Usage: $(basename "$0") [OPTIONS] [TARGET_DIR] [API_KEY] [MODEL] [MAX_DEPTH] [FILE_SIZE_LIMIT_MB]

	A single bash script that runs an interactive shell with an LLM prompted on the README files
	and file structure of a specified folder (default: ./).

	Options:
	-h, --help Show this help message and exit.

	Arguments: (you can set defaults at the top of this script)
	TARGET_DIR The directory to analyze (default: ./)
	API_KEY OpenAI API key (default: reads from environment)
	MODEL LLM model to use (default: gpt-4)
	MAX_DEPTH Depth for scanning the directory (default: 3)
	FILE_SIZE_LIMIT_MB Max file size for processing (default: 8MB)

	Example:
	$(basename "$0") /path/to/project \$OPENAI_API_KEY gpt-4 3 8
	EOF
	}

	# Check for --help flag
	if [[ "$1" == "-h" \|\| "$1" == "--help" ]]; then
	show_help
	exit 0
	fi

	# # # # # # # # # # # # # # # # # # # # # # # # # #
	# PROGRAM
	# # # # # # # # # # # # # #
	echo "--- README-CHAT ---"
	echo "[START]"

	# CAPTURE TARGET DIRECTORY STRUCTURE
	# -------------------------------------
	echo "Saving target directory structure with max depth: $MAX_DEPTH"
	TREE_OUTPUT=$(find "$TARGET_DIR" -mindepth 1 -maxdepth "$MAX_DEPTH" -type f -printf "%p\n" \| jq -R -s -c 'split("\n")[:-1]') # <-- Capture directory structure with limited depth
	README_FILES=$(find "$TARGET_DIR" -mindepth 1 -maxdepth "$MAX_DEPTH" -type f \( -iname "README.md" -o -iname "README.txt" -o -iname "README" \) -printf "%p\n" \| jq -R -s -c 'split("\n")[:-1]') # <-- Capture README files

	echo "Captured $(echo "$TREE_OUTPUT" \| jq '. \| length') files in tree structure."
	echo "Captured $(echo "$README_FILES" \| jq '. \| length') README files."

	# RUN PYTHON LLM + SHELL
	# -------------------------------------
	echo "Prompting model..."
	$py3 <<EOF
	import os, sys, json

	import openai

	# Load variables from Bash
	model = "$MODEL"
	api_key = "$API_KEY"
	file_structure = json.loads('$TREE_OUTPUT')
	readme_files = json.loads('$README_FILES')
	max_depth = $MAX_DEPTH
	file_size_limit = $FILE_SIZE_LIMIT_MB

	# OpenAI client setup
	openai.api_key = api_key

	# Build optimized prompt for LLM

	## Read and concatenate README file contents
	def read_readme_files(file_paths):
	readme_content = {}
	for file in file_paths:
	try:
	with open(file, "r", encoding="utf-8") as f:
	readme_content[file] = f.read()
	except Exception as e:
	readme_content[file] = f"[ERROR] Could not read file: {e}"
	return readme_content

	## Fetch actual README file contents
	readme_contents = read_readme_files(readme_files)

	## Prompt
	prompt = f"""
	You are an AI assistant trained to answer questions about an organization's information repository.

	The file organization is as follows:
	{json.dumps(file_structure, indent=2)}

	The following README files provide key documentation:
	{json.dumps(readme_files, indent=2)}

	Here are the full contents of the README files:
	{json.dumps(readme_contents, indent=2)}

	Your task is to help users navigate this information repository and answer their questions accurately.
	"""

	print(prompt)

	# Function to query OpenAI
	chat_history = [ # <-- Initialize conversation history
	{"role": "system", "content": prompt} # System message stays constant
	]

	def query_llm(user_input):
	client = openai.OpenAI(api_key=api_key) # Initialize client

	# Append user message to chat history
	chat_history.append({"role": "user", "content": user_input})

	# Send request with history and streaming enabled
	response = client.chat.completions.create(
	model=model,
	messages=chat_history, # Full conversation history
	stream=True # Enable streaming
	)

	print("\n[AI]: ", end="", flush=True) # Show AI label without newline
	ai_response = "" # Store response for history

	for chunk in response: # Stream tokens as they arrive
	if chunk.choices[0].delta.content: # Check if content exists
	token = chunk.choices[0].delta.content
	print(token, end="", flush=True) # Print token immediately
	ai_response += token # Store response

	print("\n") # Newline after response is complete

	# Append AI response to chat history
	chat_history.append({"role": "assistant", "content": ai_response})

	# Chat loop
	print("\n=== README-CHAT ===")
	print("Type your question about the information repository. Type 'exit' to quit.\n")
	print("[ --- CHAT --- ]\n")

	import sys

	# Open a direct connection to the terminal (works even if stdin is detached)
	try:
	input_source = open('/dev/tty')
	except OSError:
	print("Error: No terminal input available.")
	sys.exit(1)

	while True:
	try:
	print(">> ", end="", flush=True) # Display input prompt
	user_input = input_source.readline().strip() # Read user input

	if user_input.lower() in ["exit", "quit"]:
	print("Goodbye!")
	break

	query_llm(user_input) # Stream AI response in real-time

	except EOFError:
	print("\n[INFO] EOF detected. Exiting chat session.")
	break
	EOF

	echo "[END]"

	# # # # # # # # # # # #
	# Example Output:
	# -----
	# === README-CHAT ===
	# Type your question about the information repository. Type 'exit' to quit.
	#
	# [ --- CHAT --- ]
	#
	# >> what tools should the data scientist use?
	#
	# [AI]: The Data Scientist is recommended to use the following tools and libraries:
	#
	# 1. Python Libraries
	# - `ibis`: A Python library for building SQL queries.
	# - `sqlalchemy` or `dataset`: These are Python libraries for database operations such as inserting and updating data.
	# - `fastapi`: This is used to serve requests from the web page.
	#
	# 2. Database Visualization App
	# - This could be either Beekeeper Studio or MySQL Workbench. These applications provide a graphical user interface to interact with databases.
	#
	# 3. `sshfs`: This is used to locally mount the lab drive to one's local machine.\|\|
	#
	# 4. A text editor or Integrated Development Environment (IDE) to write and test the scripts.\|\|
	#
	# 5. An SSH client to connect to the server and execute scripts.\|\|
	#
	# >>