Skip to content

Instantly share code, notes, and snippets.

@lemassykoi
Last active March 16, 2025 16:49
Show Gist options
  • Save lemassykoi/e1423068d1d976961953d86609877fd5 to your computer and use it in GitHub Desktop.
Save lemassykoi/e1423068d1d976961953d86609877fd5 to your computer and use it in GitHub Desktop.
Test Ollama with 2 shots in a row to check for reproducible output
import ollama
import colorama
import os
import time
import requests
import json
B_RED = colorama.Back.RED
RED = colorama.Fore.RED
BLUE = colorama.Fore.BLUE
GREEN = colorama.Fore.GREEN
YELLOW = colorama.Fore.YELLOW
MAGENTA = colorama.Fore.MAGENTA
YELLOW_LIGHT = colorama.Fore.LIGHTYELLOW_EX
RESET = colorama.Style.RESET_ALL
url = "http://127.0.0.1:11434"
url_generate = f"{url}/api/generate"
url_chat = f"{url}/api/chat"
ollama_temperature: float = 0.0
ollama_seed: int = 1234567890
single_model: str = "gemma3:12b"
model_list: list = [
"gemma2:latest",
"gemma3:12b",
"aya:latest",
"aya-expanse:8b-q8_0",
"qwen2.5:7b-instruct-q8_0",
"mistral-nemo:latest",
"mistral:7b-instruct-v0.3-q8_0",
"mistral-small:latest",
"phi4:latest",
"phi4-mini:3.8b-fp16",
"llama3.2:latest",
"llama3.3:latest",
"llama3-groq-tool-use:8b-q8_0",
"llama3-groq-tool-use:8b-fp16",
]
prompt = "Hi there! My name is Marcel. Is all ok for you?"
def convert_nanoseconds(nano:int) -> str:
""" Convert time in nanoseconds to human readable string (french language) """
seconds = nano / 1e9
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
days, hours = divmod(hours, 24)
def pluralize(value, singular, plural):
return f"{int(value)} {plural}" if value != 1 else f"{int(value)} {singular}"
if days != 0:
formatted = f"{pluralize(days, 'jour', 'jours')}, {pluralize(hours, 'heure', 'heures')}, {pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes"
elif hours != 0:
formatted = f"{pluralize(hours, 'heure', 'heures')}, {pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes"
elif minutes != 0:
formatted = f"{pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes"
else:
formatted = f"{seconds:.2f} secondes"
return formatted
def check_service(service: str) -> bool:
result = os.system('systemctl is-active {}'.format(service))
return result == 0
def ollama_ready_to_serve() -> bool:
try:
response = requests.get(url)
return "Ollama is running" in response.text
except requests.exceptions.RequestException:
return False
def restart_ollama_service(timeout: int = 30) -> bool:
try:
print('Restarting Ollama Service')
os.system("sudo systemctl restart ollama")
# Wait until the service becomes active or timeout
start_time = time.time()
while not check_service('ollama'):
if (time.time() - start_time) > timeout:
print(f'Timeout reached: Ollama service did not become active within {timeout} seconds.')
return False
time.sleep(0.1)
print('Restart Done')
while not ollama_ready_to_serve():
time.sleep(0.1)
print('Ollama Ready')
return True
except Exception as e:
print(f'An error occurred while restarting the Ollama service: {e}')
return False
def generate_single_input(ollama_model:str = single_model):
var = ollama.generate(
model=ollama_model,
prompt=prompt,
stream=False,
options={
'seed': ollama_seed,
'temperature': ollama_temperature,
}
)
return var.response
def chat_single_input(ollama_model:str = single_model):
messages = [({'role': 'user', 'content': prompt})]
var = ollama.chat(model=ollama_model, messages=messages, stream=False, options={'temperature': ollama_temperature, 'seed': ollama_seed})
return var['message'].content
def generate_single_input_requests(ollama_model:str = single_model):
payload = {
"model": ollama_model,
"prompt": prompt,
"stream": False,
"options": {'temperature': ollama_temperature, 'seed': ollama_seed}
}
headers = {"Content-Type": "application/json"}
response = requests.post(url_generate, headers=headers, data=json.dumps(payload))
data = response.json()
return data["response"]
def chat_single_input_requests(ollama_model:str = single_model):
messages = [({'role': 'user', 'content': prompt})]
payload = {
"model": ollama_model,
"messages": messages,
"stream": False,
"options": {'temperature': ollama_temperature, 'seed': ollama_seed}
}
headers = {"Content-Type": "application/json"}
response = requests.post(url_chat, headers=headers, data=json.dumps(payload))
data = response.json()
return data['message']['content']
def log_response_consistency(responses, method_name) -> bool:
first_response = responses[0]
for i, response in enumerate(responses[1:], start=2):
if response != first_response:
print(f"{RED}Consistency check failed for {method_name}: Response {i} does not match the first one.{RESET}")
return False
print(f"{GREEN}All responses are consistent for {method_name}.{RESET}")
return True
def run_tests(ollama_model:str = single_model, n:int = 3) -> bool:
restart_ollama_service()
chat_responses = []
for i in range(n):
print("Chat Turn", i+1)
response = chat_single_input(ollama_model)
chat_responses.append(response)
chat_consistent = log_response_consistency(chat_responses, f"Ollama Chat for {ollama_model}")
restart_ollama_service()
generate_responses = []
for i in range(n):
print("Generate Turn", i+1)
response = generate_single_input(ollama_model)
generate_responses.append(response)
generate_consistent = log_response_consistency(generate_responses, f"Ollama Generate for {ollama_model}")
restart_ollama_service()
generate_requests_responses = []
for i in range(n):
print(f"Generate Turn {i+1} - (Requests Method)")
response = generate_single_input_requests(ollama_model)
generate_requests_responses.append(response)
generate_requests_consistent = log_response_consistency(generate_requests_responses, f"Ollama Generate (Requests Method) for {ollama_model}")
restart_ollama_service()
chat_requests_responses = []
for i in range(n):
print(f"Chat Turn {i+1} - (Requests Method)")
response = chat_single_input_requests(ollama_model)
chat_requests_responses.append(response)
chat_requests_consistent = log_response_consistency(chat_requests_responses, f"Ollama Chat (Requests Method) for {ollama_model}")
return all([chat_consistent, generate_consistent, generate_requests_consistent, chat_requests_consistent])
## Loop Test
consistent_models = []
inconsistent_models = []
start_duration = time.perf_counter_ns()
for model in model_list:
print(YELLOW + model + RESET)
if run_tests(model):
consistent_models.append(model)
else:
inconsistent_models.append(model)
stop_duration = time.perf_counter_ns()
total_duration = convert_nanoseconds(stop_duration - start_duration)
print(MAGENTA + '\nTotal Loop Duration: ' + total_duration + RESET)
print("=" * 120 + "\nConsistent Models:")
for model in consistent_models:
print(GREEN + model + RESET)
print("\nInconsistent Models:")
for model in inconsistent_models:
print(RED + model + RESET)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment