Last active
March 16, 2025 16:49
-
-
Save lemassykoi/e1423068d1d976961953d86609877fd5 to your computer and use it in GitHub Desktop.
Test Ollama with 2 shots in a row to check for reproducible output
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ollama | |
import colorama | |
import os | |
import time | |
import requests | |
import json | |
B_RED = colorama.Back.RED | |
RED = colorama.Fore.RED | |
BLUE = colorama.Fore.BLUE | |
GREEN = colorama.Fore.GREEN | |
YELLOW = colorama.Fore.YELLOW | |
MAGENTA = colorama.Fore.MAGENTA | |
YELLOW_LIGHT = colorama.Fore.LIGHTYELLOW_EX | |
RESET = colorama.Style.RESET_ALL | |
url = "http://127.0.0.1:11434" | |
url_generate = f"{url}/api/generate" | |
url_chat = f"{url}/api/chat" | |
ollama_temperature: float = 0.0 | |
ollama_seed: int = 1234567890 | |
single_model: str = "gemma3:12b" | |
model_list: list = [ | |
"gemma2:latest", | |
"gemma3:12b", | |
"aya:latest", | |
"aya-expanse:8b-q8_0", | |
"qwen2.5:7b-instruct-q8_0", | |
"mistral-nemo:latest", | |
"mistral:7b-instruct-v0.3-q8_0", | |
"mistral-small:latest", | |
"phi4:latest", | |
"phi4-mini:3.8b-fp16", | |
"llama3.2:latest", | |
"llama3.3:latest", | |
"llama3-groq-tool-use:8b-q8_0", | |
"llama3-groq-tool-use:8b-fp16", | |
] | |
prompt = "Hi there! My name is Marcel. Is all ok for you?" | |
def convert_nanoseconds(nano:int) -> str: | |
""" Convert time in nanoseconds to human readable string (french language) """ | |
seconds = nano / 1e9 | |
minutes, seconds = divmod(seconds, 60) | |
hours, minutes = divmod(minutes, 60) | |
days, hours = divmod(hours, 24) | |
def pluralize(value, singular, plural): | |
return f"{int(value)} {plural}" if value != 1 else f"{int(value)} {singular}" | |
if days != 0: | |
formatted = f"{pluralize(days, 'jour', 'jours')}, {pluralize(hours, 'heure', 'heures')}, {pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes" | |
elif hours != 0: | |
formatted = f"{pluralize(hours, 'heure', 'heures')}, {pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes" | |
elif minutes != 0: | |
formatted = f"{pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes" | |
else: | |
formatted = f"{seconds:.2f} secondes" | |
return formatted | |
def check_service(service: str) -> bool: | |
result = os.system('systemctl is-active {}'.format(service)) | |
return result == 0 | |
def ollama_ready_to_serve() -> bool: | |
try: | |
response = requests.get(url) | |
return "Ollama is running" in response.text | |
except requests.exceptions.RequestException: | |
return False | |
def restart_ollama_service(timeout: int = 30) -> bool: | |
try: | |
print('Restarting Ollama Service') | |
os.system("sudo systemctl restart ollama") | |
# Wait until the service becomes active or timeout | |
start_time = time.time() | |
while not check_service('ollama'): | |
if (time.time() - start_time) > timeout: | |
print(f'Timeout reached: Ollama service did not become active within {timeout} seconds.') | |
return False | |
time.sleep(0.1) | |
print('Restart Done') | |
while not ollama_ready_to_serve(): | |
time.sleep(0.1) | |
print('Ollama Ready') | |
return True | |
except Exception as e: | |
print(f'An error occurred while restarting the Ollama service: {e}') | |
return False | |
def generate_single_input(ollama_model:str = single_model): | |
var = ollama.generate( | |
model=ollama_model, | |
prompt=prompt, | |
stream=False, | |
options={ | |
'seed': ollama_seed, | |
'temperature': ollama_temperature, | |
} | |
) | |
return var.response | |
def chat_single_input(ollama_model:str = single_model): | |
messages = [({'role': 'user', 'content': prompt})] | |
var = ollama.chat(model=ollama_model, messages=messages, stream=False, options={'temperature': ollama_temperature, 'seed': ollama_seed}) | |
return var['message'].content | |
def generate_single_input_requests(ollama_model:str = single_model): | |
payload = { | |
"model": ollama_model, | |
"prompt": prompt, | |
"stream": False, | |
"options": {'temperature': ollama_temperature, 'seed': ollama_seed} | |
} | |
headers = {"Content-Type": "application/json"} | |
response = requests.post(url_generate, headers=headers, data=json.dumps(payload)) | |
data = response.json() | |
return data["response"] | |
def chat_single_input_requests(ollama_model:str = single_model): | |
messages = [({'role': 'user', 'content': prompt})] | |
payload = { | |
"model": ollama_model, | |
"messages": messages, | |
"stream": False, | |
"options": {'temperature': ollama_temperature, 'seed': ollama_seed} | |
} | |
headers = {"Content-Type": "application/json"} | |
response = requests.post(url_chat, headers=headers, data=json.dumps(payload)) | |
data = response.json() | |
return data['message']['content'] | |
def log_response_consistency(responses, method_name) -> bool: | |
first_response = responses[0] | |
for i, response in enumerate(responses[1:], start=2): | |
if response != first_response: | |
print(f"{RED}Consistency check failed for {method_name}: Response {i} does not match the first one.{RESET}") | |
return False | |
print(f"{GREEN}All responses are consistent for {method_name}.{RESET}") | |
return True | |
def run_tests(ollama_model:str = single_model, n:int = 3) -> bool: | |
restart_ollama_service() | |
chat_responses = [] | |
for i in range(n): | |
print("Chat Turn", i+1) | |
response = chat_single_input(ollama_model) | |
chat_responses.append(response) | |
chat_consistent = log_response_consistency(chat_responses, f"Ollama Chat for {ollama_model}") | |
restart_ollama_service() | |
generate_responses = [] | |
for i in range(n): | |
print("Generate Turn", i+1) | |
response = generate_single_input(ollama_model) | |
generate_responses.append(response) | |
generate_consistent = log_response_consistency(generate_responses, f"Ollama Generate for {ollama_model}") | |
restart_ollama_service() | |
generate_requests_responses = [] | |
for i in range(n): | |
print(f"Generate Turn {i+1} - (Requests Method)") | |
response = generate_single_input_requests(ollama_model) | |
generate_requests_responses.append(response) | |
generate_requests_consistent = log_response_consistency(generate_requests_responses, f"Ollama Generate (Requests Method) for {ollama_model}") | |
restart_ollama_service() | |
chat_requests_responses = [] | |
for i in range(n): | |
print(f"Chat Turn {i+1} - (Requests Method)") | |
response = chat_single_input_requests(ollama_model) | |
chat_requests_responses.append(response) | |
chat_requests_consistent = log_response_consistency(chat_requests_responses, f"Ollama Chat (Requests Method) for {ollama_model}") | |
return all([chat_consistent, generate_consistent, generate_requests_consistent, chat_requests_consistent]) | |
## Loop Test | |
consistent_models = [] | |
inconsistent_models = [] | |
start_duration = time.perf_counter_ns() | |
for model in model_list: | |
print(YELLOW + model + RESET) | |
if run_tests(model): | |
consistent_models.append(model) | |
else: | |
inconsistent_models.append(model) | |
stop_duration = time.perf_counter_ns() | |
total_duration = convert_nanoseconds(stop_duration - start_duration) | |
print(MAGENTA + '\nTotal Loop Duration: ' + total_duration + RESET) | |
print("=" * 120 + "\nConsistent Models:") | |
for model in consistent_models: | |
print(GREEN + model + RESET) | |
print("\nInconsistent Models:") | |
for model in inconsistent_models: | |
print(RED + model + RESET) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment