Skip to content

Instantly share code, notes, and snippets.

@lemassykoi
Last active March 27, 2025 01:45
Show Gist options
  • Save lemassykoi/5a6c0d655b5923e9588eef68d12fcbd2 to your computer and use it in GitHub Desktop.
Save lemassykoi/5a6c0d655b5923e9588eef68d12fcbd2 to your computer and use it in GitHub Desktop.
Test Ollama with 2 shots in a row to check for reproducible output (Chat, Generate, Embed)
import ollama
import os
import sys
import time
import requests
import uuid
import json
import colorama
from ollama import Client
logging.basicConfig(
format = '%(asctime)s - %(name)-20s - %(levelname)-10s - %(message)-40s \t (%(filename)s:%(lineno)d)',
stream = sys.stdout,
level = logging.INFO
)
logging.getLogger("httpx").setLevel(logging.ERROR)
logger = logging.getLogger(__name__)
B_RED = colorama.Back.RED
RED = colorama.Fore.RED
BLUE = colorama.Fore.BLUE
GREEN = colorama.Fore.GREEN
YELLOW = colorama.Fore.YELLOW
MAGENTA = colorama.Fore.MAGENTA
YELLOW_LIGHT = colorama.Fore.LIGHTYELLOW_EX
RESET = colorama.Style.RESET_ALL
base_url = "http://127.0.0.1:11434"
url_generate = f"{base_url}/api/generate"
url_chat = f"{base_url}/api/chat"
url_embed = f"{base_url}/api/embed"
ollama_sync_client = Client(host=base_url)
ollama_temperature: float = 0.0
ollama_seed: int = 1234567890
single_model: str = "gemma3:12b"
prompt: str = "Hi there! My name is Marcel. Is all ok for you?" # Not short prompt
def convert_nanoseconds(nano:int) -> str:
""" Convert time in nanoseconds to human readable string (french language) """
seconds = nano / 1e9
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
days, hours = divmod(hours, 24)
def pluralize(value, singular, plural):
return f"{int(value)} {plural}" if value != 1 else f"{int(value)} {singular}"
if days != 0:
formatted = f"{pluralize(days, 'jour', 'jours')}, {pluralize(hours, 'heure', 'heures')}, {pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes"
elif hours != 0:
formatted = f"{pluralize(hours, 'heure', 'heures')}, {pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes"
elif minutes != 0:
formatted = f"{pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes"
else:
formatted = f"{seconds:.2f} secondes"
return formatted
def check_service(service: str) -> bool:
result = os.system('sudo systemctl is-active {}'.format(service))
return result == 0
def ollama_ready_to_serve() -> bool:
try:
response = requests.get(base_url)
return "Ollama is running" in response.text
except requests.exceptions.RequestException:
return False
def restart_ollama_service(timeout: int = 30) -> bool:
try:
print('Restarting Ollama Service')
os.system("sudo systemctl restart ollama")
# Wait until the service becomes active or timeout
start_time = time.time()
while not check_service('ollama'):
if (time.time() - start_time) > timeout:
print(f'Timeout reached: Ollama service did not become active within {timeout} seconds.')
return False
time.sleep(0.1)
print('Restart Done')
while not ollama_ready_to_serve():
time.sleep(0.1)
print('Ollama Ready')
return True
except Exception as e:
print(f'An error occurred while restarting the Ollama service: {e}')
return False
def generate_single_input(ollama_model:str = single_model):
var = ollama.generate(
model = ollama_model,
prompt = prompt,
stream = False,
options = {
'temperature': ollama_temperature,
'seed': ollama_seed
}
)
return var.response
def chat_single_input(ollama_model:str = single_model):
messages = [({'role': 'user', 'content': prompt})]
var = ollama.chat(
model = ollama_model,
messages = messages,
stream = False,
options = {
'temperature': ollama_temperature,
'seed': ollama_seed
}
)
return var['message'].content
def embed_single_input(ollama_model:str = single_model):
try:
var = ollama.embed(
model = ollama_model,
input = prompt,
options = {
'temperature': ollama_temperature,
'seed': ollama_seed
}
)
return var['embeddings']
except Exception as e:
return f"Embedding not supported {uuid.uuid4()}"
def generate_single_input_requests(ollama_model:str = single_model):
payload = {
"model": ollama_model,
"prompt": prompt,
"stream": False,
"options": {
'temperature': ollama_temperature,
'seed': ollama_seed
}
}
headers = {"Content-Type": "application/json"}
response = requests.post(url_generate, headers=headers, data=json.dumps(payload))
data = response.json()
return data["response"]
def chat_single_input_requests(ollama_model:str = single_model):
messages = [({'role': 'user', 'content': prompt})]
payload = {
"model": ollama_model,
"messages": messages,
"stream": False,
"options": {
'temperature': ollama_temperature,
'seed': ollama_seed
}
}
headers = {"Content-Type": "application/json"}
response = requests.post(url_chat, headers=headers, data=json.dumps(payload))
data = response.json()
return data['message']['content']
def embed_single_input_requests(ollama_model:str = single_model):
payload = {
"model": ollama_model,
"messages": prompt,
"options": {
'temperature': ollama_temperature,
'seed': ollama_seed
}
}
headers = {"Content-Type": "application/json"}
try:
response = requests.post(url_embed, headers=headers, data=json.dumps(payload))
data = response.json()
return data['embeddings']
except Exception as e:
return f"Embedding not supported {uuid.uuid4()}"
def log_response_consistency(responses, method_name) -> bool:
first_response = responses[0]
for i, response in enumerate(responses[1:], start=2):
if response != first_response:
print(f"{RED}Consistency check failed for {method_name}: Response {i} does not match the first one.{RESET}")
return False
print(f"{GREEN}All responses are consistent for {method_name}.{RESET}")
return True
def run_tests(ollama_model:str = single_model, iterations:int = 2) -> bool:
start_time = time.perf_counter_ns()
restart_ollama_service()
chat_responses = []
for i in range(iterations):
print("Chat Turn", i+1)
response = chat_single_input(ollama_model)
chat_responses.append(response)
chat_consistent = log_response_consistency(chat_responses, f"Ollama Chat for {ollama_model}")
restart_ollama_service()
chat_requests_responses = []
for i in range(iterations):
print(f"Chat Turn {i+1} - (Requests Method)")
response = chat_single_input_requests(ollama_model)
chat_requests_responses.append(response)
chat_requests_consistent = log_response_consistency(chat_requests_responses, f"Ollama Chat (Requests Method) for {ollama_model}")
restart_ollama_service()
generate_responses = []
for i in range(iterations):
print("Generate Turn", i+1)
response = generate_single_input(ollama_model)
generate_responses.append(response)
generate_consistent = log_response_consistency(generate_responses, f"Ollama Generate for {ollama_model}")
restart_ollama_service()
generate_requests_responses = []
for i in range(iterations):
print(f"Generate Turn {i+1} - (Requests Method)")
response = generate_single_input_requests(ollama_model)
generate_requests_responses.append(response)
generate_requests_consistent = log_response_consistency(generate_requests_responses, f"Ollama Generate (Requests Method) for {ollama_model}")
print(MAGENTA + str(convert_nanoseconds(time.perf_counter_ns() - start_time)) + RESET)
return all([chat_consistent, generate_consistent, chat_requests_consistent, generate_requests_consistent])
def run_embed_tests(ollama_model:str = single_model, iterations:int = 2) -> bool:
start_time = time.perf_counter_ns()
restart_ollama_service()
embed_responses = []
for i in range(iterations):
print("Embed Turn", i+1)
response = embed_single_input(ollama_model)
embed_responses.append(response)
embed_consistent = log_response_consistency(embed_responses, f"Ollama Embed for {ollama_model}")
restart_ollama_service()
embed_requests_responses = []
for i in range(iterations):
print(f"Embed Turn {i+1} - (Requests Method)")
response = embed_single_input_requests(ollama_model)
embed_requests_responses.append(response)
embed_requests_consistent = log_response_consistency(embed_requests_responses, f"Ollama Embed (Requests Method) for {ollama_model}")
print(MAGENTA + str(convert_nanoseconds(time.perf_counter_ns() - start_time)) + RESET)
return all([embed_consistent, embed_requests_consistent])
def get_model_name(item):
"""
Extract the model name from an item.
The item might be an object with a "model" attribute,
a dict with key "model", or a tuple with the model name as the first element.
"""
if hasattr(item, "model"):
return item.model
elif isinstance(item, dict):
return item.get("model")
elif isinstance(item, tuple):
return item[0]
return None
def get_full_model_list(client:Client = ollama_sync_client) -> list:
"""
Use this function to only get a full list of models
"""
logger.info('[FUNC] List Full Model List')
list_response = client.list()
models = list_response.models
# Filter out models
filtered_models = [
item for item in models
if get_model_name(item) in get_model_name(item).lower()
]
if not filtered_models:
logger.error("No suitable models available.")
return None
else:
# Sort the filtered models by model name in ascending order.
filtered_models = sorted(filtered_models, key=lambda item: get_model_name(item), reverse=False)
available_model_names = [get_model_name(item) for item in filtered_models]
return available_model_names
## Get Models
logger.info('Get Full Model List')
full_model_list = get_full_model_list()
if full_model_list is None:
logger.error('Model List is empty.')
exit(1)
logger.info('Done.')
excluding_string_list = [
"embed",
"impactframes",
"code",
]
filtered_models = [model for model in full_model_list if not any(s in model for s in excluding_string_list)]
## Loop Test
consistent_models = []
inconsistent_models = []
embed_consistent_models = []
embed_inconsistent_models = []
start_duration = time.perf_counter_ns()
for model in filtered_models:
print('\n' + YELLOW + model + RESET)
if run_tests(model):
consistent_models.append(model)
else:
inconsistent_models.append(model)
if run_embed_tests(model):
embed_consistent_models.append(model)
else:
embed_inconsistent_models.append(model)
stop_duration = time.perf_counter_ns()
total_duration = convert_nanoseconds(stop_duration - start_duration)
print(MAGENTA + '\nTotal Loop Duration: ' + total_duration + RESET)
print("=" * 120 + "\nConsistent Models:")
for model in consistent_models:
print(GREEN + model + RESET)
print("\nInconsistent Models:")
for model in inconsistent_models:
print(RED + model + RESET)
print('\n\n')
print("=" * 120 + "\nEMBEDDING - Consistent Models:")
for model in embed_consistent_models:
print(GREEN + model + RESET)
print("\nEMBEDDING - Inconsistent Models:")
if embed_inconsistent_models == []:
print(' None')
else:
for model in embed_inconsistent_models:
print(RED + model + RESET)
exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment