Last active
November 20, 2024 16:40
-
-
Save pahaz/17c66fd6d75b74ff307aca2b6bf942f3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// NOTE: based on python version: https://gist.github.com/pahaz/17c66fd6d75b74ff307aca2b6bf942f3 | |
const crypto = require('crypto') | |
const fs = require('fs') | |
const path = require('path') | |
const fetch = require('node-fetch') // Import fetch from node-fetch | |
const DEFAULT_OUTPUT_DIR = 'out' | |
const GPT_KEY_FILE = path.resolve(process.env.HOME, '.openai.key') | |
const GPT_CACHE_DIR = path.resolve(process.env.HOME, '.openai.cache') | |
function cleanAndTrimText (text) { | |
return text.replace(/[\n\r\t\f\v]/g, ' ').split(/\s+/).join(' ').slice(0, 50).trim() | |
} | |
function loadApiKey () { | |
if (fs.existsSync(GPT_KEY_FILE)) { | |
return fs.readFileSync(GPT_KEY_FILE, 'utf-8').trim() | |
} else if (process.env.OPENAI_API_KEY) { | |
return process.env.OPENAI_API_KEY.trim() | |
} else { | |
throw new Error('API key file not found and OPENAI_API_KEY environment variable is not set.') | |
} | |
} | |
function calculateQueryHash (query) { | |
return crypto.createHash('sha256').update(query, 'utf8').digest('hex') | |
} | |
function isQueryCached (queryHash) { | |
if (!fs.existsSync(GPT_CACHE_DIR)) { | |
fs.mkdirSync(GPT_CACHE_DIR, { recursive: true }) | |
} | |
const queryFile = path.join(GPT_CACHE_DIR, `${queryHash}.query.txt`) | |
const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`) | |
return fs.existsSync(queryFile) && fs.existsSync(answerFile) | |
} | |
function loadCachedAnswer (queryHash) { | |
const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`) | |
return fs.readFileSync(answerFile, 'utf-8') | |
} | |
function cacheQueryAndAnswer (query, answer, queryHash) { | |
const queryFile = path.join(GPT_CACHE_DIR, `${queryHash}.query.txt`) | |
const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`) | |
fs.writeFileSync(queryFile, query, 'utf-8') | |
fs.writeFileSync(answerFile, answer, 'utf-8') | |
} | |
async function getOpenAiResponse (query) { | |
const apiKey = loadApiKey() | |
const response = await fetch('https://api.openai.com/v1/chat/completions', { | |
method: 'POST', | |
headers: { | |
'Content-Type': 'application/json', | |
'Authorization': `Bearer ${apiKey}`, | |
}, | |
body: JSON.stringify({ | |
model: 'gpt-4o', | |
messages: [{ role: 'user', content: query }], | |
}), | |
}) | |
if (!response.ok) { | |
throw new Error(`OpenAI API returned an error: ${response.statusText}`) | |
} | |
const data = await response.json() | |
return data.choices[0].message.content.trim() | |
} | |
async function main (inputFile, outputDir) { | |
if (!fs.existsSync(inputFile)) { | |
console.error(`Input file not found: ${inputFile}`) | |
process.exit(1) | |
} | |
const queries = fs.readFileSync(inputFile, 'utf-8').split('----') | |
if (!fs.existsSync(outputDir)) { | |
fs.mkdirSync(outputDir, { recursive: true }) | |
} | |
for (let i = 0; i < queries.length; i++) { | |
let query = queries[i].trim() | |
if (!query) continue | |
const queryHash = calculateQueryHash(query) | |
let answer | |
if (isQueryCached(queryHash)) { | |
console.log(`Q${i + 1} (cached): ${cleanAndTrimText(query)}`) | |
answer = loadCachedAnswer(queryHash) | |
} else { | |
console.log(`Q${i + 1} (api): ${cleanAndTrimText(query)}`) | |
answer = await getOpenAiResponse(query) | |
cacheQueryAndAnswer(query, answer, queryHash) | |
} | |
console.log(`A${i + 1}: ${cleanAndTrimText(answer)}`) | |
const outputFile = path.join(outputDir, `result_${i + 1}.md`) | |
fs.writeFileSync(outputFile, answer, 'utf-8') | |
console.log(`Result saved to ${outputFile}`) | |
} | |
} | |
if (require.main === module) { | |
const args = process.argv.slice(2) | |
if (args.length < 1 || args.length > 3) { | |
console.error('Usage: node script.js <input_file> [--out <output_directory>]') | |
process.exit(1) | |
} | |
const inputFile = args[0] | |
let outputDir = DEFAULT_OUTPUT_DIR | |
// Parse optional arguments | |
const outIndex = args.indexOf('--out') | |
if (outIndex !== -1) { | |
if (args.length > outIndex + 1) { | |
outputDir = args[outIndex + 1] | |
} else { | |
console.error('Error: --out option requires a directory argument') | |
process.exit(1) | |
} | |
} | |
main(inputFile, outputDir).catch((err) => { | |
console.error('Error:', err) | |
process.exit(1) | |
}) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# NOTE: based on python version: https://gist.github.com/pahaz/17c66fd6d75b74ff307aca2b6bf942f3 | |
import sys | |
import os | |
import hashlib | |
import requests | |
import re | |
import argparse | |
DEFAULT_OUTPUT_DIR = 'out' | |
API_KEY_FILE = '~/.openai.key' | |
CACHE_DIR = '~/.openai.cache' | |
def clean_and_trim_text(text): | |
cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text) | |
cleaned_text = ' '.join(cleaned_text.split()) | |
return cleaned_text[:50].strip() | |
def load_api_key(): | |
# First, try to get the API key from environment variable | |
api_key = os.getenv('OPENAI_API_KEY') | |
if api_key: | |
return api_key | |
# If not found, look for the key in the file | |
key_path = os.path.expanduser(API_KEY_FILE) | |
if not os.path.exists(key_path): | |
raise FileNotFoundError( | |
f"API key not found. Please set the 'OPENAI_API_KEY' environment variable " | |
f"or create a file at: {key_path}" | |
) | |
with open(key_path, 'r') as f: | |
return f.read().strip() | |
def calculate_query_hash(query): | |
return hashlib.sha256(query.encode('utf-8')).hexdigest() | |
def is_query_cached(query_hash): | |
cache_dir = os.path.expanduser(CACHE_DIR) | |
if not os.path.exists(cache_dir): | |
os.makedirs(cache_dir) | |
query_file = os.path.join(cache_dir, f'{query_hash}.query.txt') | |
answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt') | |
return os.path.exists(query_file) and os.path.exists(answer_file) | |
def load_cached_answer(query_hash): | |
cache_dir = os.path.expanduser(CACHE_DIR) | |
answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt') | |
with open(answer_file, 'r') as f: | |
return f.read() | |
def cache_query_and_answer(query, answer, query_hash): | |
cache_dir = os.path.expanduser(CACHE_DIR) | |
query_file = os.path.join(cache_dir, f'{query_hash}.query.txt') | |
answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt') | |
with open(query_file, 'w') as qf: | |
qf.write(query) | |
with open(answer_file, 'w') as af: | |
af.write(answer) | |
def get_openai_response(query): | |
api_key = load_api_key() | |
url = "https://api.openai.com/v1/chat/completions" | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {api_key}" | |
} | |
payload = { | |
"model": "gpt-4", | |
"messages": [ | |
{"role": "user", "content": query} | |
], | |
"temperature": 0.7 | |
} | |
response = requests.post(url, headers=headers, json=payload) | |
if response.status_code == 200: | |
response_json = response.json() | |
return response_json['choices'][0]['message']['content'].strip() | |
else: | |
raise Exception(f"OpenAI API request failed with status code {response.status_code}: {response.text}") | |
# Main function | |
def main(input_file, output_dir): | |
# Read input file and split by delimiter | |
with open(input_file, 'r') as f: | |
queries = f.read().split('----') | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
for i, query in enumerate(queries): | |
query = query.strip() | |
if not query: | |
continue | |
# Calculate hash of the query | |
query_hash = calculate_query_hash(query) | |
# Check if the query has been cached | |
if is_query_cached(query_hash): | |
print(f"Q{i+1} (cached): {clean_and_trim_text(query)}") | |
answer = load_cached_answer(query_hash) | |
else: | |
print(f"Q{i+1} (api): {clean_and_trim_text(query)}") | |
answer = get_openai_response(query) | |
cache_query_and_answer(query, answer, query_hash) | |
print(f"A{i+1}: {clean_and_trim_text(answer)}") | |
# Write result to output file | |
output_file = os.path.join(output_dir, f'result_{i+1}.md') | |
with open(output_file, 'w') as f: | |
f.write(answer) | |
print(f"Result saved to {output_file}") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Process input file and save OpenAI API responses.") | |
parser.add_argument("input_file", help="The input file containing queries") | |
parser.add_argument("--out", dest="output_dir", default=DEFAULT_OUTPUT_DIR, help="Output directory to save results (default: 'out')") | |
args = parser.parse_args() | |
input_file = args.input_file | |
output_dir = args.output_dir | |
if not os.path.exists(input_file): | |
print(f"Input file not found: {input_file}") | |
sys.exit(1) | |
main(input_file, output_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment