Last active
March 27, 2025 01:45
-
-
Save lemassykoi/5a6c0d655b5923e9588eef68d12fcbd2 to your computer and use it in GitHub Desktop.
Test Ollama with 2 shots in a row to check for reproducible output (Chat, Generate, Embed)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ollama | |
import os | |
import sys | |
import time | |
import requests | |
import uuid | |
import json | |
import colorama | |
from ollama import Client | |
logging.basicConfig( | |
format = '%(asctime)s - %(name)-20s - %(levelname)-10s - %(message)-40s \t (%(filename)s:%(lineno)d)', | |
stream = sys.stdout, | |
level = logging.INFO | |
) | |
logging.getLogger("httpx").setLevel(logging.ERROR) | |
logger = logging.getLogger(__name__) | |
B_RED = colorama.Back.RED | |
RED = colorama.Fore.RED | |
BLUE = colorama.Fore.BLUE | |
GREEN = colorama.Fore.GREEN | |
YELLOW = colorama.Fore.YELLOW | |
MAGENTA = colorama.Fore.MAGENTA | |
YELLOW_LIGHT = colorama.Fore.LIGHTYELLOW_EX | |
RESET = colorama.Style.RESET_ALL | |
base_url = "http://127.0.0.1:11434" | |
url_generate = f"{base_url}/api/generate" | |
url_chat = f"{base_url}/api/chat" | |
url_embed = f"{base_url}/api/embed" | |
ollama_sync_client = Client(host=base_url) | |
ollama_temperature: float = 0.0 | |
ollama_seed: int = 1234567890 | |
single_model: str = "gemma3:12b" | |
prompt: str = "Hi there! My name is Marcel. Is all ok for you?" # Not short prompt | |
def convert_nanoseconds(nano:int) -> str: | |
""" Convert time in nanoseconds to human readable string (french language) """ | |
seconds = nano / 1e9 | |
minutes, seconds = divmod(seconds, 60) | |
hours, minutes = divmod(minutes, 60) | |
days, hours = divmod(hours, 24) | |
def pluralize(value, singular, plural): | |
return f"{int(value)} {plural}" if value != 1 else f"{int(value)} {singular}" | |
if days != 0: | |
formatted = f"{pluralize(days, 'jour', 'jours')}, {pluralize(hours, 'heure', 'heures')}, {pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes" | |
elif hours != 0: | |
formatted = f"{pluralize(hours, 'heure', 'heures')}, {pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes" | |
elif minutes != 0: | |
formatted = f"{pluralize(minutes, 'minute', 'minutes')} et {seconds:.2f} secondes" | |
else: | |
formatted = f"{seconds:.2f} secondes" | |
return formatted | |
def check_service(service: str) -> bool: | |
result = os.system('sudo systemctl is-active {}'.format(service)) | |
return result == 0 | |
def ollama_ready_to_serve() -> bool: | |
try: | |
response = requests.get(base_url) | |
return "Ollama is running" in response.text | |
except requests.exceptions.RequestException: | |
return False | |
def restart_ollama_service(timeout: int = 30) -> bool: | |
try: | |
print('Restarting Ollama Service') | |
os.system("sudo systemctl restart ollama") | |
# Wait until the service becomes active or timeout | |
start_time = time.time() | |
while not check_service('ollama'): | |
if (time.time() - start_time) > timeout: | |
print(f'Timeout reached: Ollama service did not become active within {timeout} seconds.') | |
return False | |
time.sleep(0.1) | |
print('Restart Done') | |
while not ollama_ready_to_serve(): | |
time.sleep(0.1) | |
print('Ollama Ready') | |
return True | |
except Exception as e: | |
print(f'An error occurred while restarting the Ollama service: {e}') | |
return False | |
def generate_single_input(ollama_model:str = single_model): | |
var = ollama.generate( | |
model = ollama_model, | |
prompt = prompt, | |
stream = False, | |
options = { | |
'temperature': ollama_temperature, | |
'seed': ollama_seed | |
} | |
) | |
return var.response | |
def chat_single_input(ollama_model:str = single_model): | |
messages = [({'role': 'user', 'content': prompt})] | |
var = ollama.chat( | |
model = ollama_model, | |
messages = messages, | |
stream = False, | |
options = { | |
'temperature': ollama_temperature, | |
'seed': ollama_seed | |
} | |
) | |
return var['message'].content | |
def embed_single_input(ollama_model:str = single_model): | |
try: | |
var = ollama.embed( | |
model = ollama_model, | |
input = prompt, | |
options = { | |
'temperature': ollama_temperature, | |
'seed': ollama_seed | |
} | |
) | |
return var['embeddings'] | |
except Exception as e: | |
return f"Embedding not supported {uuid.uuid4()}" | |
def generate_single_input_requests(ollama_model:str = single_model): | |
payload = { | |
"model": ollama_model, | |
"prompt": prompt, | |
"stream": False, | |
"options": { | |
'temperature': ollama_temperature, | |
'seed': ollama_seed | |
} | |
} | |
headers = {"Content-Type": "application/json"} | |
response = requests.post(url_generate, headers=headers, data=json.dumps(payload)) | |
data = response.json() | |
return data["response"] | |
def chat_single_input_requests(ollama_model:str = single_model): | |
messages = [({'role': 'user', 'content': prompt})] | |
payload = { | |
"model": ollama_model, | |
"messages": messages, | |
"stream": False, | |
"options": { | |
'temperature': ollama_temperature, | |
'seed': ollama_seed | |
} | |
} | |
headers = {"Content-Type": "application/json"} | |
response = requests.post(url_chat, headers=headers, data=json.dumps(payload)) | |
data = response.json() | |
return data['message']['content'] | |
def embed_single_input_requests(ollama_model:str = single_model): | |
payload = { | |
"model": ollama_model, | |
"messages": prompt, | |
"options": { | |
'temperature': ollama_temperature, | |
'seed': ollama_seed | |
} | |
} | |
headers = {"Content-Type": "application/json"} | |
try: | |
response = requests.post(url_embed, headers=headers, data=json.dumps(payload)) | |
data = response.json() | |
return data['embeddings'] | |
except Exception as e: | |
return f"Embedding not supported {uuid.uuid4()}" | |
def log_response_consistency(responses, method_name) -> bool: | |
first_response = responses[0] | |
for i, response in enumerate(responses[1:], start=2): | |
if response != first_response: | |
print(f"{RED}Consistency check failed for {method_name}: Response {i} does not match the first one.{RESET}") | |
return False | |
print(f"{GREEN}All responses are consistent for {method_name}.{RESET}") | |
return True | |
def run_tests(ollama_model:str = single_model, iterations:int = 2) -> bool: | |
start_time = time.perf_counter_ns() | |
restart_ollama_service() | |
chat_responses = [] | |
for i in range(iterations): | |
print("Chat Turn", i+1) | |
response = chat_single_input(ollama_model) | |
chat_responses.append(response) | |
chat_consistent = log_response_consistency(chat_responses, f"Ollama Chat for {ollama_model}") | |
restart_ollama_service() | |
chat_requests_responses = [] | |
for i in range(iterations): | |
print(f"Chat Turn {i+1} - (Requests Method)") | |
response = chat_single_input_requests(ollama_model) | |
chat_requests_responses.append(response) | |
chat_requests_consistent = log_response_consistency(chat_requests_responses, f"Ollama Chat (Requests Method) for {ollama_model}") | |
restart_ollama_service() | |
generate_responses = [] | |
for i in range(iterations): | |
print("Generate Turn", i+1) | |
response = generate_single_input(ollama_model) | |
generate_responses.append(response) | |
generate_consistent = log_response_consistency(generate_responses, f"Ollama Generate for {ollama_model}") | |
restart_ollama_service() | |
generate_requests_responses = [] | |
for i in range(iterations): | |
print(f"Generate Turn {i+1} - (Requests Method)") | |
response = generate_single_input_requests(ollama_model) | |
generate_requests_responses.append(response) | |
generate_requests_consistent = log_response_consistency(generate_requests_responses, f"Ollama Generate (Requests Method) for {ollama_model}") | |
print(MAGENTA + str(convert_nanoseconds(time.perf_counter_ns() - start_time)) + RESET) | |
return all([chat_consistent, generate_consistent, chat_requests_consistent, generate_requests_consistent]) | |
def run_embed_tests(ollama_model:str = single_model, iterations:int = 2) -> bool: | |
start_time = time.perf_counter_ns() | |
restart_ollama_service() | |
embed_responses = [] | |
for i in range(iterations): | |
print("Embed Turn", i+1) | |
response = embed_single_input(ollama_model) | |
embed_responses.append(response) | |
embed_consistent = log_response_consistency(embed_responses, f"Ollama Embed for {ollama_model}") | |
restart_ollama_service() | |
embed_requests_responses = [] | |
for i in range(iterations): | |
print(f"Embed Turn {i+1} - (Requests Method)") | |
response = embed_single_input_requests(ollama_model) | |
embed_requests_responses.append(response) | |
embed_requests_consistent = log_response_consistency(embed_requests_responses, f"Ollama Embed (Requests Method) for {ollama_model}") | |
print(MAGENTA + str(convert_nanoseconds(time.perf_counter_ns() - start_time)) + RESET) | |
return all([embed_consistent, embed_requests_consistent]) | |
def get_model_name(item): | |
""" | |
Extract the model name from an item. | |
The item might be an object with a "model" attribute, | |
a dict with key "model", or a tuple with the model name as the first element. | |
""" | |
if hasattr(item, "model"): | |
return item.model | |
elif isinstance(item, dict): | |
return item.get("model") | |
elif isinstance(item, tuple): | |
return item[0] | |
return None | |
def get_full_model_list(client:Client = ollama_sync_client) -> list: | |
""" | |
Use this function to only get a full list of models | |
""" | |
logger.info('[FUNC] List Full Model List') | |
list_response = client.list() | |
models = list_response.models | |
# Filter out models | |
filtered_models = [ | |
item for item in models | |
if get_model_name(item) in get_model_name(item).lower() | |
] | |
if not filtered_models: | |
logger.error("No suitable models available.") | |
return None | |
else: | |
# Sort the filtered models by model name in ascending order. | |
filtered_models = sorted(filtered_models, key=lambda item: get_model_name(item), reverse=False) | |
available_model_names = [get_model_name(item) for item in filtered_models] | |
return available_model_names | |
## Get Models | |
logger.info('Get Full Model List') | |
full_model_list = get_full_model_list() | |
if full_model_list is None: | |
logger.error('Model List is empty.') | |
exit(1) | |
logger.info('Done.') | |
excluding_string_list = [ | |
"embed", | |
"impactframes", | |
"code", | |
] | |
filtered_models = [model for model in full_model_list if not any(s in model for s in excluding_string_list)] | |
## Loop Test | |
consistent_models = [] | |
inconsistent_models = [] | |
embed_consistent_models = [] | |
embed_inconsistent_models = [] | |
start_duration = time.perf_counter_ns() | |
for model in filtered_models: | |
print('\n' + YELLOW + model + RESET) | |
if run_tests(model): | |
consistent_models.append(model) | |
else: | |
inconsistent_models.append(model) | |
if run_embed_tests(model): | |
embed_consistent_models.append(model) | |
else: | |
embed_inconsistent_models.append(model) | |
stop_duration = time.perf_counter_ns() | |
total_duration = convert_nanoseconds(stop_duration - start_duration) | |
print(MAGENTA + '\nTotal Loop Duration: ' + total_duration + RESET) | |
print("=" * 120 + "\nConsistent Models:") | |
for model in consistent_models: | |
print(GREEN + model + RESET) | |
print("\nInconsistent Models:") | |
for model in inconsistent_models: | |
print(RED + model + RESET) | |
print('\n\n') | |
print("=" * 120 + "\nEMBEDDING - Consistent Models:") | |
for model in embed_consistent_models: | |
print(GREEN + model + RESET) | |
print("\nEMBEDDING - Inconsistent Models:") | |
if embed_inconsistent_models == []: | |
print(' None') | |
else: | |
for model in embed_inconsistent_models: | |
print(RED + model + RESET) | |
exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment