pahaz · November 20, 2024 16:40
diff --git a/gptquery.js b/gptquery.js
 // NOTE: based on python version: https://gist.github.com/pahaz/17c66fd6d75b74ff307aca2b6bf942f3
 const crypto = require('crypto')
 const fs = require('fs')
 const path = require('path')

 const fetch = require('node-fetch') // Import fetch from node-fetch

 const DEFAULT_OUTPUT_DIR = 'out'
 const GPT_KEY_FILE = path.resolve(process.env.HOME, '.openai.key')
 const GPT_CACHE_DIR = path.resolve(process.env.HOME, '.openai.cache')

 function cleanAndTrimText (text) {
    return text.replace(/[\n\r\t\f\v]/g, ' ').split(/\s+/).join(' ').slice(0, 50).trim()
 }

 function loadApiKey () {
    if (fs.existsSync(GPT_KEY_FILE)) {
        return fs.readFileSync(GPT_KEY_FILE, 'utf-8').trim()
    } else if (process.env.OPENAI_API_KEY) {
        return process.env.OPENAI_API_KEY.trim()
    } else {
        throw new Error('API key file not found and OPENAI_API_KEY environment variable is not set.')
    }
 }

 function calculateQueryHash (query) {
    return crypto.createHash('sha256').update(query, 'utf8').digest('hex')
 }

 function isQueryCached (queryHash) {
    if (!fs.existsSync(GPT_CACHE_DIR)) {
        fs.mkdirSync(GPT_CACHE_DIR, { recursive: true })
    }
    const queryFile = path.join(GPT_CACHE_DIR, `${queryHash}.query.txt`)
    const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`)
    return fs.existsSync(queryFile) && fs.existsSync(answerFile)
 }

 function loadCachedAnswer (queryHash) {
    const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`)
    return fs.readFileSync(answerFile, 'utf-8')
 }

 function cacheQueryAndAnswer (query, answer, queryHash) {
    const queryFile = path.join(GPT_CACHE_DIR, `${queryHash}.query.txt`)
    const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`)
    fs.writeFileSync(queryFile, query, 'utf-8')
    fs.writeFileSync(answerFile, answer, 'utf-8')
 }

 async function getOpenAiResponse (query) {
    const apiKey = loadApiKey()
    const response = await fetch('https://api.openai.com/v1/chat/completions', {
        method: 'POST',
        headers: {
            'Content-Type': 'application/json',
            'Authorization': `Bearer ${apiKey}`,
        },
        body: JSON.stringify({
            model: 'gpt-4o',
            messages: [{ role: 'user', content: query }],
        }),
    })

    if (!response.ok) {
        throw new Error(`OpenAI API returned an error: ${response.statusText}`)
    }

    const data = await response.json()
    return data.choices[0].message.content.trim()
 }

 async function main (inputFile, outputDir) {
    if (!fs.existsSync(inputFile)) {
        console.error(`Input file not found: ${inputFile}`)
        process.exit(1)
    }

    const queries = fs.readFileSync(inputFile, 'utf-8').split('----')
    if (!fs.existsSync(outputDir)) {
        fs.mkdirSync(outputDir, { recursive: true })
    }

    for (let i = 0; i < queries.length; i++) {
        let query = queries[i].trim()
        if (!query) continue

        const queryHash = calculateQueryHash(query)
        let answer

        if (isQueryCached(queryHash)) {
            console.log(`Q${i + 1} (cached): ${cleanAndTrimText(query)}`)
            answer = loadCachedAnswer(queryHash)
        } else {
            console.log(`Q${i + 1} (api): ${cleanAndTrimText(query)}`)
            answer = await getOpenAiResponse(query)
            cacheQueryAndAnswer(query, answer, queryHash)
        }

        console.log(`A${i + 1}: ${cleanAndTrimText(answer)}`)

        const outputFile = path.join(outputDir, `result_${i + 1}.md`)
        fs.writeFileSync(outputFile, answer, 'utf-8')
        console.log(`Result saved to ${outputFile}`)
    }
 }

 if (require.main === module) {
    const args = process.argv.slice(2)
    if (args.length < 1 || args.length > 3) {
        console.error('Usage: node script.js <input_file> [--out <output_directory>]')
        process.exit(1)
    }

    const inputFile = args[0]
    let outputDir = DEFAULT_OUTPUT_DIR

    // Parse optional arguments
    const outIndex = args.indexOf('--out')
    if (outIndex !== -1) {
        if (args.length > outIndex + 1) {
            outputDir = args[outIndex + 1]
        } else {
            console.error('Error: --out option requires a directory argument')
            process.exit(1)
        }
    }

    main(inputFile, outputDir).catch((err) => {
        console.error('Error:', err)
        process.exit(1)
    })
 }
diff --git a/gptquery.py b/gptquery.py
 # NOTE: based on python version: https://gist.github.com/pahaz/17c66fd6d75b74ff307aca2b6bf942f3
 import sys
 import os
 import hashlib
 import requests
 import re
 import argparse

 DEFAULT_OUTPUT_DIR = 'out'
 API_KEY_FILE = '~/.openai.key'
 CACHE_DIR = '~/.openai.cache'

 def clean_and_trim_text(text):
    cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text)
    cleaned_text = ' '.join(cleaned_text.split())
    return cleaned_text[:50].strip()

 def load_api_key():
    # First, try to get the API key from environment variable
    api_key = os.getenv('OPENAI_API_KEY')
    if api_key:
        return api_key

    # If not found, look for the key in the file
    key_path = os.path.expanduser(API_KEY_FILE)
    if not os.path.exists(key_path):
        raise FileNotFoundError(
            f"API key not found. Please set the 'OPENAI_API_KEY' environment variable "
            f"or create a file at: {key_path}"
        )
    with open(key_path, 'r') as f:
        return f.read().strip()

 def calculate_query_hash(query):
    return hashlib.sha256(query.encode('utf-8')).hexdigest()

 def is_query_cached(query_hash):
    cache_dir = os.path.expanduser(CACHE_DIR)
    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
    query_file = os.path.join(cache_dir, f'{query_hash}.query.txt')
    answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt')
    return os.path.exists(query_file) and os.path.exists(answer_file)

 def load_cached_answer(query_hash):
    cache_dir = os.path.expanduser(CACHE_DIR)
    answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt')
    with open(answer_file, 'r') as f:
        return f.read()

 def cache_query_and_answer(query, answer, query_hash):
    cache_dir = os.path.expanduser(CACHE_DIR)
    query_file = os.path.join(cache_dir, f'{query_hash}.query.txt')
    answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt')
    with open(query_file, 'w') as qf:
        qf.write(query)
    with open(answer_file, 'w') as af:
        af.write(answer)

 def get_openai_response(query):
    api_key = load_api_key()
    url = "https://api.openai.com/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    payload = {
        "model": "gpt-4",
        "messages": [
            {"role": "user", "content": query}
        ],
        "temperature": 0.7
    }

    response = requests.post(url, headers=headers, json=payload)

    if response.status_code == 200:
        response_json = response.json()
        return response_json['choices'][0]['message']['content'].strip()
    else:
        raise Exception(f"OpenAI API request failed with status code {response.status_code}: {response.text}")

 # Main function
 def main(input_file, output_dir):
    # Read input file and split by delimiter
    with open(input_file, 'r') as f:
        queries = f.read().split('----')

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for i, query in enumerate(queries):
        query = query.strip()
        if not query:
            continue

        # Calculate hash of the query
        query_hash = calculate_query_hash(query)

        # Check if the query has been cached
        if is_query_cached(query_hash):
            print(f"Q{i+1} (cached): {clean_and_trim_text(query)}")
            answer = load_cached_answer(query_hash)
        else:
            print(f"Q{i+1} (api): {clean_and_trim_text(query)}")
            answer = get_openai_response(query)
            cache_query_and_answer(query, answer, query_hash)
        print(f"A{i+1}: {clean_and_trim_text(answer)}")

        # Write result to output file
        output_file = os.path.join(output_dir, f'result_{i+1}.md')
        with open(output_file, 'w') as f:
            f.write(answer)

        print(f"Result saved to {output_file}")

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Process input file and save OpenAI API responses.")
    parser.add_argument("input_file", help="The input file containing queries")
    parser.add_argument("--out", dest="output_dir", default=DEFAULT_OUTPUT_DIR, help="Output directory to save results (default: 'out')")

    args = parser.parse_args()

    input_file = args.input_file
    output_dir = args.output_dir

    if not os.path.exists(input_file):
        print(f"Input file not found: {input_file}")
        sys.exit(1)

    main(input_file, output_dir)
	// NOTE: based on python version: https://gist.github.com/pahaz/17c66fd6d75b74ff307aca2b6bf942f3
	const crypto = require('crypto')
	const fs = require('fs')
	const path = require('path')

	const fetch = require('node-fetch') // Import fetch from node-fetch

	const DEFAULT_OUTPUT_DIR = 'out'
	const GPT_KEY_FILE = path.resolve(process.env.HOME, '.openai.key')
	const GPT_CACHE_DIR = path.resolve(process.env.HOME, '.openai.cache')

	function cleanAndTrimText (text) {
	return text.replace(/[\n\r\t\f\v]/g, ' ').split(/\s+/).join(' ').slice(0, 50).trim()
	}

	function loadApiKey () {
	if (fs.existsSync(GPT_KEY_FILE)) {
	return fs.readFileSync(GPT_KEY_FILE, 'utf-8').trim()
	} else if (process.env.OPENAI_API_KEY) {
	return process.env.OPENAI_API_KEY.trim()
	} else {
	throw new Error('API key file not found and OPENAI_API_KEY environment variable is not set.')
	}
	}

	function calculateQueryHash (query) {
	return crypto.createHash('sha256').update(query, 'utf8').digest('hex')
	}

	function isQueryCached (queryHash) {
	if (!fs.existsSync(GPT_CACHE_DIR)) {
	fs.mkdirSync(GPT_CACHE_DIR, { recursive: true })
	}
	const queryFile = path.join(GPT_CACHE_DIR, `${queryHash}.query.txt`)
	const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`)
	return fs.existsSync(queryFile) && fs.existsSync(answerFile)
	}

	function loadCachedAnswer (queryHash) {
	const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`)
	return fs.readFileSync(answerFile, 'utf-8')
	}

	function cacheQueryAndAnswer (query, answer, queryHash) {
	const queryFile = path.join(GPT_CACHE_DIR, `${queryHash}.query.txt`)
	const answerFile = path.join(GPT_CACHE_DIR, `${queryHash}.answer.txt`)
	fs.writeFileSync(queryFile, query, 'utf-8')
	fs.writeFileSync(answerFile, answer, 'utf-8')
	}

	async function getOpenAiResponse (query) {
	const apiKey = loadApiKey()
	const response = await fetch('https://api.openai.com/v1/chat/completions', {
	method: 'POST',
	headers: {
	'Content-Type': 'application/json',
	'Authorization': `Bearer ${apiKey}`,
	},
	body: JSON.stringify({
	model: 'gpt-4o',
	messages: [{ role: 'user', content: query }],
	}),
	})

	if (!response.ok) {
	throw new Error(`OpenAI API returned an error: ${response.statusText}`)
	}

	const data = await response.json()
	return data.choices[0].message.content.trim()
	}

	async function main (inputFile, outputDir) {
	if (!fs.existsSync(inputFile)) {
	console.error(`Input file not found: ${inputFile}`)
	process.exit(1)
	}

	const queries = fs.readFileSync(inputFile, 'utf-8').split('----')
	if (!fs.existsSync(outputDir)) {
	fs.mkdirSync(outputDir, { recursive: true })
	}

	for (let i = 0; i < queries.length; i++) {
	let query = queries[i].trim()
	if (!query) continue

	const queryHash = calculateQueryHash(query)
	let answer

	if (isQueryCached(queryHash)) {
	console.log(`Q${i + 1} (cached): ${cleanAndTrimText(query)}`)
	answer = loadCachedAnswer(queryHash)
	} else {
	console.log(`Q${i + 1} (api): ${cleanAndTrimText(query)}`)
	answer = await getOpenAiResponse(query)
	cacheQueryAndAnswer(query, answer, queryHash)
	}

	console.log(`A${i + 1}: ${cleanAndTrimText(answer)}`)

	const outputFile = path.join(outputDir, `result_${i + 1}.md`)
	fs.writeFileSync(outputFile, answer, 'utf-8')
	console.log(`Result saved to ${outputFile}`)
	}
	}

	if (require.main === module) {
	const args = process.argv.slice(2)
	if (args.length < 1 \|\| args.length > 3) {
	console.error('Usage: node script.js <input_file> [--out <output_directory>]')
	process.exit(1)
	}

	const inputFile = args[0]
	let outputDir = DEFAULT_OUTPUT_DIR

	// Parse optional arguments
	const outIndex = args.indexOf('--out')
	if (outIndex !== -1) {
	if (args.length > outIndex + 1) {
	outputDir = args[outIndex + 1]
	} else {
	console.error('Error: --out option requires a directory argument')
	process.exit(1)
	}
	}

	main(inputFile, outputDir).catch((err) => {
	console.error('Error:', err)
	process.exit(1)
	})
	}
	# NOTE: based on python version: https://gist.github.com/pahaz/17c66fd6d75b74ff307aca2b6bf942f3
	import sys
	import os
	import hashlib
	import requests
	import re
	import argparse

	DEFAULT_OUTPUT_DIR = 'out'
	API_KEY_FILE = '~/.openai.key'
	CACHE_DIR = '~/.openai.cache'

	def clean_and_trim_text(text):
	cleaned_text = re.sub(r'[\n\r\t\f\v]', ' ', text)
	cleaned_text = ' '.join(cleaned_text.split())
	return cleaned_text[:50].strip()

	def load_api_key():
	# First, try to get the API key from environment variable
	api_key = os.getenv('OPENAI_API_KEY')
	if api_key:
	return api_key

	# If not found, look for the key in the file
	key_path = os.path.expanduser(API_KEY_FILE)
	if not os.path.exists(key_path):
	raise FileNotFoundError(
	f"API key not found. Please set the 'OPENAI_API_KEY' environment variable "
	f"or create a file at: {key_path}"
	)
	with open(key_path, 'r') as f:
	return f.read().strip()

	def calculate_query_hash(query):
	return hashlib.sha256(query.encode('utf-8')).hexdigest()

	def is_query_cached(query_hash):
	cache_dir = os.path.expanduser(CACHE_DIR)
	if not os.path.exists(cache_dir):
	os.makedirs(cache_dir)
	query_file = os.path.join(cache_dir, f'{query_hash}.query.txt')
	answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt')
	return os.path.exists(query_file) and os.path.exists(answer_file)

	def load_cached_answer(query_hash):
	cache_dir = os.path.expanduser(CACHE_DIR)
	answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt')
	with open(answer_file, 'r') as f:
	return f.read()

	def cache_query_and_answer(query, answer, query_hash):
	cache_dir = os.path.expanduser(CACHE_DIR)
	query_file = os.path.join(cache_dir, f'{query_hash}.query.txt')
	answer_file = os.path.join(cache_dir, f'{query_hash}.answer.txt')
	with open(query_file, 'w') as qf:
	qf.write(query)
	with open(answer_file, 'w') as af:
	af.write(answer)

	def get_openai_response(query):
	api_key = load_api_key()
	url = "https://api.openai.com/v1/chat/completions"
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}"
	}
	payload = {
	"model": "gpt-4",
	"messages": [
	{"role": "user", "content": query}
	],
	"temperature": 0.7
	}

	response = requests.post(url, headers=headers, json=payload)

	if response.status_code == 200:
	response_json = response.json()
	return response_json['choices'][0]['message']['content'].strip()
	else:
	raise Exception(f"OpenAI API request failed with status code {response.status_code}: {response.text}")

	# Main function
	def main(input_file, output_dir):
	# Read input file and split by delimiter
	with open(input_file, 'r') as f:
	queries = f.read().split('----')

	if not os.path.exists(output_dir):
	os.makedirs(output_dir)

	for i, query in enumerate(queries):
	query = query.strip()
	if not query:
	continue

	# Calculate hash of the query
	query_hash = calculate_query_hash(query)

	# Check if the query has been cached
	if is_query_cached(query_hash):
	print(f"Q{i+1} (cached): {clean_and_trim_text(query)}")
	answer = load_cached_answer(query_hash)
	else:
	print(f"Q{i+1} (api): {clean_and_trim_text(query)}")
	answer = get_openai_response(query)
	cache_query_and_answer(query, answer, query_hash)
	print(f"A{i+1}: {clean_and_trim_text(answer)}")

	# Write result to output file
	output_file = os.path.join(output_dir, f'result_{i+1}.md')
	with open(output_file, 'w') as f:
	f.write(answer)

	print(f"Result saved to {output_file}")

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Process input file and save OpenAI API responses.")
	parser.add_argument("input_file", help="The input file containing queries")
	parser.add_argument("--out", dest="output_dir", default=DEFAULT_OUTPUT_DIR, help="Output directory to save results (default: 'out')")

	args = parser.parse_args()

	input_file = args.input_file
	output_dir = args.output_dir

	if not os.path.exists(input_file):
	print(f"Input file not found: {input_file}")
	sys.exit(1)

	main(input_file, output_dir)