Skip to content

Instantly share code, notes, and snippets.

@AshtonIzmev
Last active August 12, 2025 00:03
Show Gist options
  • Select an option

  • Save AshtonIzmev/b028fa8d73cb4a07bbaefd06c77d396d to your computer and use it in GitHub Desktop.

Select an option

Save AshtonIzmev/b028fa8d73cb4a07bbaefd06c77d396d to your computer and use it in GitHub Desktop.
OpenAI utils
from openai import OpenAI
client = OpenAI(api_key='sk-proj-XXX')
def gpt_request(prompt, model="gpt-4o-mini", max_tokens=100):
try:
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
n=1,
stop=None,
temperature=0.7,
)
return response.choices[0].message.content.strip()
except Exception as e:
print(f"An error occurred: {e}")
return None
prompt = "What is the capital of France?"
result = gpt_request(prompt)
def get_ada_embedding(text):
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
response = client.embeddings.create(input=[text], model="text-embedding-3-small")
return response.data[0].embedding
async def transcribe_audio(self, file_path: str, user_key: str) -> str:
"""Transcribe audio file using Deepgram API"""
logger.info(f"Transcribing audio file: {file_path}")
# Check if file exists
if not os.path.exists(file_path):
logger.error(f"File not found: {file_path}")
return f"Transcription failed: File not found: {file_path}"
# Get file size
file_size = os.path.getsize(file_path)
if file_size == 0:
logger.error(f"File is empty: {file_path}")
return f"Transcription failed: File is empty: {file_path}"
# Configure transcription options
options = PrerecordedOptions(model="nova-2", smart_format=True, language="fr")
# Read the file as binary data
with open(file_path, "rb") as audio:
buffer_data = audio.read()
payload: FileSource = {
"buffer": buffer_data,
}
# Ensure we're passing binary data to the API
response = self.deepgram.listen.prerecorded.v("1").transcribe_file(
payload, options
)
# Extract transcript from response
transcript = response.results.channels[0].alternatives[0].transcript
# Delete the audio file after transcription
try:
os.remove(file_path)
logger.info(f"Deleted audio file: {file_path}")
except Exception as e:
logger.error(f"Failed to delete audio file {file_path}: {str(e)}")
logger.info(f"Transcription completed for user: {user_key}")
logger.info(f"Transcript: {transcript}")
return transcript
import langfun as lf
import pyglove as pg
class Structure(pg.Object):
short_summary: str
title: str
keywords: list[str]
why_interesting: str
def PROMPT(case_details):
return lf.query_prompt(
f'''Do what you need to to
Case details: {case_details}
Format your response strictly according to the Structure class, without any additional explanation.''',
schema=Structure,
lm=lf.llms.Gpt4o(api_key='sk-proj-XXX'),
).text
def messages(t):
return [
{"role": "system", "content": "You are a XXX"},
{"role": "user", "content": PROMPT(t)}
]
jsonl_data = [generate_jsonl_data(
k, model, max_tokens, messages(v['content'])) for k, v in pdf_contents_analyzed.items()]
import langfun as lf
import pyglove as pg
class AccidentVoitureDetails(pg.Object):
nom_personne_accidentee: str
prenom_personne_accidentee: str
def get_accident_voiture_details(file_text):
r = lf.query(prompt=file_text, schema=AccidentVoitureDetails, lm=lf.llms.Gpt4o())
return r
from openai import OpenAI
from dotenv import load_dotenv
import os
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
PROMPT_GENERIC = """
You are an expert analyst specializing in data classification. Your task is to analyze a given input and determine if it matches specific criteria based on the provided guidelines.
The inputs are often short, abbreviated, and may not explicitly contain certain keywords. You must look for specific patterns and indicators.
**Analysis and Response Instructions:**
* Analyze the user-provided input.
* If you find a clear or potential match, set `is_match` to `True`. Identify the `keyword`, select the most appropriate `category`, and provide a `confidence` score.
* Use high confidence (0.8-1.0) for explicit terms that strongly indicate a match.
* Use medium confidence (0.4-0.7) for terms that likely indicate a match but aren't certain.
* Use low confidence (0.1-0.3) for ambiguous terms that might indicate a match.
* If the input does not appear to match the criteria, set `is_match` to `False`, set `confidence` to 0.0, and briefly state the reason in the `explanation`.
**Example 1:**
* **Input:** `EXAMPLE INPUT 1`
* **Output:** `{"is_match": true, "category": "Category A", "keyword": "EXAMPLE", "confidence": 1.0, "explanation": "Clear match for Category A."}`
**Example 2:**
* **Input:** `EXAMPLE INPUT 2`
* **Output:** `{"is_match": false, "category": null, "keyword": null, "confidence": 0.0, "explanation": "No matching criteria found."}`
**Example 3:**
* **Input:** `EXAMPLE INPUT 3`
* **Output:** `{"is_match": true, "category": "Category B", "keyword": "INPUT 3", "confidence": 0.95, "explanation": "Strong indicator of Category B."}`
"""
# Define the JSON schema for structured output
json_schema_generic = {
"name": "pattern_analysis",
"schema": {
"type": "object",
"properties": {
"is_match": {
"type": "boolean",
"description": "True if the input matches the specified criteria"
},
"category": {
"type": ["string", "null"],
"enum": [
"Category A", "Category B", "Category C",
"Category D", "Category E", "Category F",
"Category G", "Category H", "Category I",
"Category J", "Category K", "Category L",
"Category M", "Category N", "Other"
],
"description": "Category of match if detected"
},
"keyword": {
"type": ["string", "null"],
"description": "Specific keyword or phrase that suggests a match"
},
"confidence": {
"type": "number",
"minimum": 0.0,
"maximum": 1.0,
"description": "Confidence score from 0.0 to 1.0"
},
"explanation": {
"type": "string",
"description": "Brief explanation of the analysis"
}
},
"required": ["is_match", "category", "keyword", "confidence", "explanation"],
"additionalProperties": False
},
"strict": True
}
def generate_jsonl_data_structured(id, model, messages, response_format):
import json
return json.dumps({
"custom_id": f"request-{id}",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": model,
"messages": messages,
"response_format": response_format
}
})
def generate_jsonl_data(id, model, max_tokens, messages):
import json
return json.dumps({
"custom_id": f"request-{id}",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": model,
"messages": messages,
"max_tokens": max_tokens
}
})
def create_batch_job(client, jsonl_data):
import io
batch_input_file = client.files.create(
file=io.BytesIO("\n".join(jsonl_data).encode('utf-8')),
purpose="batch"
)
return client.batches.create(
input_file_id=batch_input_file.id,
endpoint="/v1/chat/completions",
completion_window="24h",
metadata={
"description": "nightly eval job"
}
)
def check_batch_job(client, batch_job_id):
return client.batches.retrieve(batch_job_id)
def get_batch_job_results(client, output_file_id):
return client.files.content(output_file_id)
def get_zipped_results_list(jsonl_data, results: list):
import json
zipped_results = []
parsed_results = [json.loads(line) for result in results for line in result.text.strip().split('\n')]
jsonl_parsed = [json.loads(item) for item in jsonl_data]
data_dict = {item['custom_id']: item for item in jsonl_parsed}
result_dict = {item['custom_id']: item for item in parsed_results}
# Match and create zipped_results
for custom_id in data_dict:
if custom_id in result_dict:
data = data_dict[custom_id]
result = result_dict[custom_id]
zipped_results.append({
'custom_id': custom_id,
'prompt': data['body']['messages'][0]['content'],
'user': data['body']['messages'][1]['content'],
'response': result['response']['body']['choices'][0]['message']['content']
})
else:
raise ValueError(f"No matching result found for custom_id: {custom_id}")
return zipped_results
def get_zipped_results_str(jsonl_data, results):
import json
zipped_results = []
parsed_results = [json.loads(line) for line in results.text.strip().split('\n')]
jsonl_parsed = [json.loads(item) for item in jsonl_data]
data_dict = {item['custom_id']: item for item in jsonl_parsed}
result_dict = {item['custom_id']: item for item in parsed_results}
# Match and create zipped_results
for custom_id in data_dict:
if custom_id in result_dict:
data = data_dict[custom_id]
result = result_dict[custom_id]
zipped_results.append({
'custom_id': custom_id,
'prompt': data['body']['messages'][0]['content'],
'response': result['response']['body']['choices'][0]['message']['content']
})
else:
raise ValueError(f"No matching result found for custom_id: {custom_id}")
return zipped_results
############
############
############
from openai import OpenAI
client = OpenAI(api_key='sk-proj-_XXXX')
model = "gpt-4o-mini"
max_tokens = 100
messages = [
{"role": "user", "content": "What is the capital of France?"},
{"role": "user", "content": "What is the capital of Germany?"},
{"role": "user", "content": "What is the capital of Spain?"},
{"role": "user", "content": "What is the capital of Italy?"},
{"role": "user", "content": "What is the capital of Portugal?"},
{"role": "user", "content": "What is the capital of Switzerland?"},
{"role": "user", "content": "What is the capital of Brazil?"},
{"role": "user", "content": "What is the capital of Canada?"},
{"role": "user", "content": "What is the capital of Japan?"},
]
jsonl_data = [generate_jsonl_data(i, model, max_tokens, [msg]) for i, msg in enumerate(messages)]
batch_job = create_batch_job(client, jsonl_data)
print(batch_job)
check_job = check_batch_job(client, batch_job.id)
print(check_job)
check_job = check_batch_job(client, batch_job.id)
output_file_id = check_job.output_file_id
print(f"Batch Job Details:")
print(f" ID: {check_job.id}")
from datetime import datetime
print(f" Created At: {datetime.fromtimestamp(int(check_job.created_at)).strftime('%Y-%m-%d %H:%M:%S')}")
print(f" Status: {check_job.status}")
print(f" Completed At: {datetime.fromtimestamp(int(check_job.completed_at)).strftime('%Y-%m-%d %H:%M:%S')}")
print(f" Output File ID: {output_file_id}")
results = get_batch_job_results(client, output_file_id)
# Print the zipped results
for item in get_zipped_results(jsonl_data, results):
print(f"Custom ID: {item['custom_id']}")
print(f"Prompt: {item['prompt']}")
print(f"Response: {item['response']}")
print()
from pydantic import BaseModel
from openai import OpenAI
from dotenv import load_dotenv
import os
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
class TranslationToEnglish(BaseModel):
text: str
def translate_to_english(text: str) -> str:
completion = client.beta.chat.completions.parse(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "Translate the following text to English."},
{"role": "user", "content": f"{text}"},
],
response_format=TranslationToEnglish,
)
return completion.choices[0].message.parsed
OPENAI_PRICING = {
"gpt-4o-mini": (0.15, 0.6),
"gpt-4o": (2.5, 10),
"o1-mini": (15, 60),
}
@dataclass
class UsageTracker:
prompt_tokens: int
completion_tokens: int
total_price: float
def add(self, prompt_tokens: int, completion_tokens: int, model_name: str):
self.prompt_tokens += prompt_tokens
self.completion_tokens += completion_tokens
self.total_price += (
OPENAI_PRICING[model_name][0] * prompt_tokens / 1000000
+ OPENAI_PRICING[model_name][1] * completion_tokens / 1000000
)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
async def text_to_speech(text: str, user_key: str) -> Path:
logger.info(f"Converting text to speech: {text[:50]}...")
audio_filename = f"response_{user_key}.mp3"
audio_path = os.path.join(audio_service.audio_dir, audio_filename)
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=f"[French] {text}"
)
output_path = Path(audio_path)
with open(output_path, 'wb') as f:
for chunk in response.iter_bytes():
f.write(chunk)
logger.info(f"Audio file saved successfully at: {output_path}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment