Skip to content

Instantly share code, notes, and snippets.

@pahaz
Last active November 20, 2024 16:40
Show Gist options
  • Save pahaz/17c66fd6d75b74ff307aca2b6bf942f3 to your computer and use it in GitHub Desktop.
Save pahaz/17c66fd6d75b74ff307aca2b6bf942f3 to your computer and use it in GitHub Desktop.
// NOTE: based on python version: https://gist.github.com/pahaz/17c66fd6d75b74ff307aca2b6bf942f3
const crypto = require('crypto')
const fs = require('fs')
const path = require('path')
const fetch = require('node-fetch') // Import fetch from node-fetch
const DEFAULT_OUTPUT_DIR = 'out'
const GPT_KEY_FILE = path.resolve(process.env.HOME, '.openai.key')
const GPT_CACHE_DIR = path.resolve(process.env.HOME, '.openai.cache')
function cleanAndTrimText (text) {
return text.replace(/[\n\r\t\f\v]/g, ' ').split(/\s+/).join(' ').slice(0, 50).trim()
}
function loadApiKey () {
if (fs.existsSync(GPT_KEY_FILE)) {
return fs.readFileSync(GPT_KEY_FILE, 'utf-8').trim()
} else if (process.env.OPENAI_API_KEY) {
return process.env.OPENAI_API_KEY.trim()
} else {
throw new Error('API key file not found and OPENAI_API_KEY environment variable is not set.')
}
}
function calculateQueryHash (query) {
return crypto.createHash('sha256').update(query, 'utf8').digest('hex')
}
function isQueryCached (queryHash) {
if (!fs.existsSync(GPT_CACHE_DIR)) {
fs.mkdirSync(GPT_CACHE_DIR, { recursive: true })
}
const queryFile = path.join(GPT_CACHE_DIR, `${queryHash}.query.txt`)
const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`)
return fs.existsSync(queryFile) && fs.existsSync(answerFile)
}
function loadCachedAnswer (queryHash) {
const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`)
return fs.readFileSync(answerFile, 'utf-8')
}
function cacheQueryAndAnswer (query, answer, queryHash) {
const queryFile = path.join(GPT_CACHE_DIR, `${queryHash}.query.txt`)
const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`)
fs.writeFileSync(queryFile, query, 'utf-8')
fs.writeFileSync(answerFile, answer, 'utf-8')
}
async function getOpenAiResponse (query) {
const apiKey = loadApiKey()
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`,
},
body: JSON.stringify({
model: 'gpt-4o',
messages: [{ role: 'user', content: query }],
}),
})
if (!response.ok) {
throw new Error(`OpenAI API returned an error: ${response.statusText}`)
}
const data = await response.json()
return data.choices[0].message.content.trim()
}
async function main (inputFile, outputDir) {
if (!fs.existsSync(inputFile)) {
console.error(`Input file not found: ${inputFile}`)
process.exit(1)
}
const queries = fs.readFileSync(inputFile, 'utf-8').split('----')
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true })
}
for (let i = 0; i < queries.length; i++) {
let query = queries[i].trim()
if (!query) continue
const queryHash = calculateQueryHash(query)
let answer
if (isQueryCached(queryHash)) {
console.log(`Q${i + 1} (cached): ${cleanAndTrimText(query)}`)
answer = loadCachedAnswer(queryHash)
} else {
console.log(`Q${i + 1} (api): ${cleanAndTrimText(query)}`)
answer = await getOpenAiResponse(query)
cacheQueryAndAnswer(query, answer, queryHash)
}
console.log(`A${i + 1}: ${cleanAndTrimText(answer)}`)
const outputFile = path.join(outputDir, `result_${i + 1}.md`)
fs.writeFileSync(outputFile, answer, 'utf-8')
console.log(`Result saved to ${outputFile}`)
}
}
if (require.main === module) {
const args = process.argv.slice(2)
if (args.length < 1 || args.length > 3) {
console.error('Usage: node script.js <input_file> [--out <output_directory>]')
process.exit(1)
}
const inputFile = args[0]
let outputDir = DEFAULT_OUTPUT_DIR
// Parse optional arguments
const outIndex = args.indexOf('--out')
if (outIndex !== -1) {
if (args.length > outIndex + 1) {
outputDir = args[outIndex + 1]
} else {
console.error('Error: --out option requires a directory argument')
process.exit(1)
}
}
main(inputFile, outputDir).catch((err) => {
console.error('Error:', err)
process.exit(1)
})
}
# NOTE: based on python version: https://gist.github.com/pahaz/17c66fd6d75b74ff307aca2b6bf942f3
import sys
import os
import hashlib
import requests
import re
import argparse
DEFAULT_OUTPUT_DIR = 'out'
API_KEY_FILE = '~/.openai.key'
CACHE_DIR = '~/.openai.cache'
def clean_and_trim_text(text):
cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text)
cleaned_text = ' '.join(cleaned_text.split())
return cleaned_text[:50].strip()
def load_api_key():
# First, try to get the API key from environment variable
api_key = os.getenv('OPENAI_API_KEY')
if api_key:
return api_key
# If not found, look for the key in the file
key_path = os.path.expanduser(API_KEY_FILE)
if not os.path.exists(key_path):
raise FileNotFoundError(
f"API key not found. Please set the 'OPENAI_API_KEY' environment variable "
f"or create a file at: {key_path}"
)
with open(key_path, 'r') as f:
return f.read().strip()
def calculate_query_hash(query):
return hashlib.sha256(query.encode('utf-8')).hexdigest()
def is_query_cached(query_hash):
cache_dir = os.path.expanduser(CACHE_DIR)
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
query_file = os.path.join(cache_dir, f'{query_hash}.query.txt')
answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt')
return os.path.exists(query_file) and os.path.exists(answer_file)
def load_cached_answer(query_hash):
cache_dir = os.path.expanduser(CACHE_DIR)
answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt')
with open(answer_file, 'r') as f:
return f.read()
def cache_query_and_answer(query, answer, query_hash):
cache_dir = os.path.expanduser(CACHE_DIR)
query_file = os.path.join(cache_dir, f'{query_hash}.query.txt')
answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt')
with open(query_file, 'w') as qf:
qf.write(query)
with open(answer_file, 'w') as af:
af.write(answer)
def get_openai_response(query):
api_key = load_api_key()
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4",
"messages": [
{"role": "user", "content": query}
],
"temperature": 0.7
}
response = requests.post(url, headers=headers, json=payload)
if response.status_code == 200:
response_json = response.json()
return response_json['choices'][0]['message']['content'].strip()
else:
raise Exception(f"OpenAI API request failed with status code {response.status_code}: {response.text}")
# Main function
def main(input_file, output_dir):
# Read input file and split by delimiter
with open(input_file, 'r') as f:
queries = f.read().split('----')
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for i, query in enumerate(queries):
query = query.strip()
if not query:
continue
# Calculate hash of the query
query_hash = calculate_query_hash(query)
# Check if the query has been cached
if is_query_cached(query_hash):
print(f"Q{i+1} (cached): {clean_and_trim_text(query)}")
answer = load_cached_answer(query_hash)
else:
print(f"Q{i+1} (api): {clean_and_trim_text(query)}")
answer = get_openai_response(query)
cache_query_and_answer(query, answer, query_hash)
print(f"A{i+1}: {clean_and_trim_text(answer)}")
# Write result to output file
output_file = os.path.join(output_dir, f'result_{i+1}.md')
with open(output_file, 'w') as f:
f.write(answer)
print(f"Result saved to {output_file}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process input file and save OpenAI API responses.")
parser.add_argument("input_file", help="The input file containing queries")
parser.add_argument("--out", dest="output_dir", default=DEFAULT_OUTPUT_DIR, help="Output directory to save results (default: 'out')")
args = parser.parse_args()
input_file = args.input_file
output_dir = args.output_dir
if not os.path.exists(input_file):
print(f"Input file not found: {input_file}")
sys.exit(1)
main(input_file, output_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment