Created
August 19, 2025 06:04
-
-
Save OrsoEric/dd060bcdce853ca3db266203138d6cea to your computer and use it in GitHub Desktop.
generate_benchmark_chart.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Take all the scorecard in the chart folder | |
Generate various charts and tables comparing models against each other | |
this script works on the folder "output_chart" | |
The user manually put any number of aggregate json score cards generated by "execute_benchmark" ona range of models, machines, benchmarks and runtimes | |
This script takes care of generating useful tables and charts from that data | |
CHARTS | |
show the model accuracy against the size on disk | |
Y accuracy | |
X size GB | |
Y speed T/s | |
X size GB | |
Y speed C/s | |
X size GB | |
show the model speed against the accuracy | |
Y accuracy | |
X speed C/s | |
Y accuracy | |
X speed T/s | |
Performance with context against accuracy, it's a polygon | |
Y accuracy structure, match, accuracy | |
X context T | |
Y accuracy structure, match, accuracy | |
X context C | |
TABLE | |
model | |
compression | |
QUANT | |
I want to see the performance across quantization | |
""" | |
import logging | |
import json | |
from os import path, listdir | |
from typing import List, Dict | |
import matplotlib.pyplot as plt | |
from matplotlib import ticker | |
#benchmark scorecard structure, the same used by the benchmark executon script | |
from st_score_card import St_score_card | |
def load_model_sizes(json_file_path): | |
with open(json_file_path, 'r', encoding='utf-8') as file: | |
data = json.load(file) | |
d_model_sizes = { | |
path.splitext(entry["s_filename"])[0].lower(): entry["n_size_gb"] | |
for entry in data | |
} | |
return d_model_sizes | |
def get_files_in_directory(i_s_folder_path: str, s_extension = ".json") -> List[str]: | |
""" | |
This function takes a directory path as input and returns a list containing | |
paths of all JSON files within the directory. | |
Parameters: | |
i_folder_path (str): The path to the folder where the search is conducted. | |
Returns: | |
List[str]: A list of strings, each representing the full path of a .json file in the specified folder. | |
""" | |
ln_json_files = [] | |
for st_file_name in listdir(i_s_folder_path): | |
if st_file_name.endswith(s_extension): | |
st_full_file_path = path.join(i_s_folder_path, st_file_name) | |
ln_json_files.append(st_full_file_path) | |
return ln_json_files | |
class St_chart_row: | |
""" | |
defines a row of in a chart | |
""" | |
def __init__(self): | |
#X position | |
self.n_axis_x : float = 0.0 | |
self.n_axis_x_std : float = 0.0 | |
#Y bar height | |
self.n_axis_y_avg : float = 0.0 | |
self.n_axis_y_std : float = 0.0 | |
#decorators | |
self.s_color : str = "" | |
self.s_label : str = "" | |
pass | |
def __repr__(self): | |
return f"X: {self.n_axis_x} | Yavg: {self.n_axis_y_avg} | Ystd: {self.n_axis_y_std} | Color: {self.s_color}" | |
class Cl_generate_chart(): | |
def __init__(self): | |
self.gst_scorecard : List[St_score_card] = list() | |
return | |
def load_scorecard(self) -> bool: | |
# LOAD all JSON | |
s_path_json = path.join( "output_chart" ) | |
ls_file_json = get_files_in_directory( s_path_json ) | |
logging.debug(f"JSON files {len(ls_file_json)}: {ls_file_json}") | |
s_path_model_size = path.join( "output_chart", "model_size.json" ) | |
#If I have a json listing all the model sizes | |
if s_path_model_size in ls_file_json: | |
# LOAD model size | |
d_model_size = load_model_sizes(s_path_model_size) | |
logging.info(f"Loaded size from {len(d_model_size)} models: {d_model_size}") | |
#It's not necessary, I'll later skip some very useful metrics | |
else: | |
d_model_size = dict() | |
#load only the scorecards and discard all other JSON | |
ls_path_scorecard = [s for s in ls_file_json if "aggregate" in s] | |
logging.info(f"Scorecards {len(ls_path_scorecard)} | {ls_path_scorecard}") | |
lst_scorecard : List[St_score_card] = list() | |
logging.debug(f"KEYS {d_model_size.keys()}") | |
#now load the content of the scorecards | |
for s_path_scorecard in ls_path_scorecard: | |
st_score_card : St_score_card = St_score_card() | |
st_score_card.load_json(s_path_scorecard) | |
# @bugfix some models also append the quantization with at after the model name | |
st_score_card.st_stat_model.s_name = st_score_card.st_stat_model.s_name.replace("@", "-") | |
logging.info(f"look for: {st_score_card.st_stat_model.s_name.lower()} | {[s_model_name for s_model_name in d_model_size if st_score_card.st_stat_model.s_name.lower() in s_model_name]}") | |
logging.info(f"look for: {st_score_card.st_stat_model.s_quantization.lower()} | {[s_model_name for s_model_name in d_model_size if st_score_card.st_stat_model.s_quantization.lower() in s_model_name]}") | |
ls_model = [s_model_name for s_model_name in d_model_size if st_score_card.st_stat_model.s_name.lower() in s_model_name and st_score_card.st_stat_model.s_quantization.lower() in s_model_name] | |
if len(ls_model)>0: | |
n_size_gb = d_model_size[ls_model[0]] | |
st_score_card.st_stat_model.n_size_on_disk_gb = n_size_gb | |
logging.info(f"Found size of model in GB: {n_size_gb}") | |
else: | |
logging.info(f"ERR: model size of {s_path_scorecard} in GB not found.") | |
logging.debug(f"laoded: {st_score_card}") | |
lst_scorecard.append(st_score_card) | |
self.gst_scorecard = lst_scorecard | |
logging.info(f"laoded {len(lst_scorecard)} scorecards") | |
return False #OK | |
def generate_table(self) -> bool: | |
""" | |
Tables | |
""" | |
class St_row: | |
def __init__(self): | |
self.s_machine_hostname : str = "" | |
return | |
return False #OK | |
def add_sma_match_score_accuracy_to_legend(self, i_lst_legend : List[plt.Line2D]) -> bool: | |
# Add "MATCH SCORE" entry with green color | |
st_legend = plt.Line2D([], [], color='green', linestyle='', label='STRUCTURE SCORE (understand tags)') | |
i_lst_legend.append(st_legend) | |
st_legend = plt.Line2D([], [], color='orange', linestyle='', label='MATCH SCORE (answer to every question)') | |
i_lst_legend.append(st_legend) | |
st_legend = plt.Line2D([], [], color='cyan', linestyle='', label='ACCURACY SCORE (answer correctly)') | |
i_lst_legend.append(st_legend) | |
return False #OK | |
def add_qa_question_answer_to_legend(self, i_lst_legend : List[plt.Line2D]) -> bool: | |
# Add "MATCH SCORE" entry with green color | |
st_legend = plt.Line2D([], [], color='green', linestyle='', label='QUESTION SPEED (time to first token)') | |
i_lst_legend.append(st_legend) | |
st_legend = plt.Line2D([], [], color='pink', linestyle='', label='ANSWER SPEED (token generation speed)') | |
i_lst_legend.append(st_legend) | |
return False #OK | |
def generate_plot_legend(self) -> Dict: | |
""" | |
From the scorecards I extract all the models and all the runtimes | |
I extract two dictionaries associating a key with a model name and a runtime name | |
models use numbers | |
runtimes use letters | |
""" | |
d_legend : Dict = dict() | |
n_cnt_model : int = 0 | |
n_cnt_runtime : int = 0 | |
#chr(ord('A')+n_index | |
for n_index, st_scorecard in enumerate(self.gst_scorecard): | |
#extract model name | |
s_model_name = st_scorecard.get_model_name() | |
#if model name is not already saved | |
if s_model_name not in d_legend: | |
n_cnt_model += 1 | |
d_legend[s_model_name] = f"{n_cnt_model}" | |
logging.info(f"New model {s_model_name} | {n_cnt_model}") | |
#if model is already saved | |
else: | |
#do nothing | |
pass | |
#extract runtime name | |
s_runtime_name = st_scorecard.get_runtime_name() | |
#if runtime name is not already saved | |
if s_runtime_name not in d_legend: | |
n_cnt_runtime += 1 | |
d_legend[s_runtime_name] = f"{chr(ord('A')+n_cnt_runtime-1)}" | |
logging.info(f"New runtime {s_runtime_name} | {n_cnt_runtime}") | |
#if model is already saved | |
else: | |
#do nothing | |
pass | |
return d_legend | |
#-------------------------------------------------------------------------------------------- | |
# Generic Plot Generators | |
#-------------------------------------------------------------------------------------------- | |
def generate_plot_log(self, i_s_name_chart : str, i_s_name_axis_x : str, i_s_name_axis_y : str, i_lst_data : List["St_chart_row"], i_d_legend : Dict, i_s_legend_type : str ) -> bool: | |
""" | |
This function encapsulates the chart generation circuitry | |
so that specialized chart generation function have less redundant | |
work to do to make charts | |
It's designed to do bar charts on logarithmic Y axis | |
""" | |
# Create plot | |
plt.figure(figsize=(12, 8)) | |
plt.title(f"{i_s_name_axis_x} (x) VS {i_s_name_axis_y} (Y)") | |
plt.xlabel(i_s_name_axis_x) | |
plt.ylabel(i_s_name_axis_y) | |
#Logarithmic chart | |
plt.yscale('log') | |
yticker = ticker.LogLocator(base=2, numticks=20) | |
plt.gca().yaxis.set_major_locator(yticker) | |
plt.gca().yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}')) | |
for st_row in i_lst_data: | |
#if the bar is too small, draw a dot instead | |
if st_row.n_axis_y_std < abs(st_row.n_axis_y_avg)/1000.0: | |
plt.plot(st_row.n_axis_x, st_row.n_axis_y_avg, "o", color=st_row.s_color ) | |
#draw the bar where height represent the standard deviation | |
else: | |
n_bar_height = st_row.n_axis_y_std | |
n_bar_bottom = st_row.n_axis_y_avg -st_row.n_axis_y_std/2 | |
#use std information to change width | |
if st_row.n_axis_x_std < 0.05: | |
x_std = 0.05 | |
else: | |
x_std = st_row.n_axis_x_std | |
plt.bar(st_row.n_axis_x, n_bar_height, width=x_std, bottom=n_bar_bottom, color=st_row.s_color, alpha = 0.5) | |
plt.text(st_row.n_axis_x, st_row.n_axis_y_avg, st_row.s_label, fontsize=12, ha='center') | |
#Convert legend from dictionary | |
lst_legend : List[plt.Line2D] = list() | |
# Scan the legend and sort by value | |
for s_key, s_value in sorted(i_d_legend.items(), key=lambda item: item[1]): | |
st_legend = plt.Line2D([], [], linestyle='', label=f'{s_value}: {s_key}') | |
lst_legend.append(st_legend) | |
#if I want to add Structure, Match, Accuracy | |
if i_s_legend_type == "sma": | |
self.add_sma_match_score_accuracy_to_legend( lst_legend ) | |
elif i_s_legend_type == "qa": | |
self.add_qa_question_answer_to_legend( lst_legend ) | |
#draw the legends | |
plt.legend( | |
handles=lst_legend, | |
labels=[legend.get_label() for legend in lst_legend], | |
labelcolor = [legend.get_color() for legend in lst_legend], | |
handler_map={}, | |
#labelcolor='green', # Set text color to green globally (or customize per entry) | |
bbox_to_anchor=(1.2, 0.75), | |
loc='lower center' | |
) | |
plt.grid(True) | |
# Save the plot as a PNG file | |
s_path = path.join( "output_chart", f"{i_s_name_chart}.png" ) | |
plt.savefig(s_path, bbox_inches='tight') | |
plt.close() | |
return False #OK | |
def generate_plot_probability(self, i_s_name_chart : str, i_s_name_axis_x : str, i_s_name_axis_y : str, i_lst_data : List["St_chart_row"], i_d_legend : Dict, i_s_legend_type : str ) -> bool: | |
""" | |
This function encapsulates the chart generation circuitry | |
so that specialized chart generation function have less redundant | |
work to do to make charts | |
It's designed to show probabilities between zero and one | |
""" | |
# Create plot | |
plt.figure(figsize=(12, 8)) | |
plt.title(f"{i_s_name_axis_x} (x) VS {i_s_name_axis_y} (Y)") | |
plt.xlabel(i_s_name_axis_x) | |
plt.ylabel(i_s_name_axis_y) | |
# Set the Y-axis range from 0 to 1 | |
plt.ylim(0, 1) | |
# Format Y-axis labels as percentages | |
plt.gca().yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.3f}')) | |
for st_row in i_lst_data: | |
#if the bar is too small, draw a dot instead | |
if st_row.n_axis_y_std < abs(st_row.n_axis_y_avg)/1000.0: | |
plt.plot(st_row.n_axis_x, st_row.n_axis_y_avg, "o", color=st_row.s_color ) | |
#draw the bar where height represent the standard deviation | |
else: | |
n_bar_height = st_row.n_axis_y_std | |
n_bar_bottom = st_row.n_axis_y_avg -st_row.n_axis_y_std/2 | |
plt.bar(st_row.n_axis_x, n_bar_height, width=0.1, bottom=n_bar_bottom, color=st_row.s_color) | |
plt.text(st_row.n_axis_x, st_row.n_axis_y_avg, st_row.s_label, fontsize=12, ha='center') | |
#Convert legend from dictionary | |
lst_legend : List[plt.Line2D] = list() | |
# Scan the legend and sort by value | |
for s_key, s_value in sorted(i_d_legend.items(), key=lambda item: item[1]): | |
st_legend = plt.Line2D([], [], linestyle='', label=f'{s_value}: {s_key}') | |
lst_legend.append(st_legend) | |
#if I want to add Structure, Match, Accuracy | |
if i_s_legend_type == "sma": | |
self.add_sma_match_score_accuracy_to_legend( lst_legend ) | |
elif i_s_legend_type == "qa": | |
self.add_qa_question_answer_to_legend( lst_legend ) | |
#draw the legends | |
#plt.legend(handles=lst_legend, bbox_to_anchor=(1.2, 0.75), loc='lower center') | |
plt.legend( | |
handles=lst_legend, | |
labels=[legend.get_label() for legend in lst_legend], | |
labelcolor = [legend.get_color() for legend in lst_legend], | |
handler_map={}, | |
#labelcolor='green', # Set text color to green globally (or customize per entry) | |
bbox_to_anchor=(1.2, 0.75), | |
loc='lower center' | |
) | |
plt.grid(True) | |
# Save the plot as a PNG file | |
s_path = path.join( "output_chart", f"{i_s_name_chart}.png" ) | |
plt.savefig(s_path, bbox_inches='tight') | |
plt.close() | |
return False #OK | |
#-------------------------------------------------------------------------------------------- | |
# Specific Charts XY | |
#-------------------------------------------------------------------------------------------- | |
def generate_x_size_y_speed_tps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool: | |
""" | |
I can receive the legend as higher hierarchy. after all all my chart should list model and runtime? | |
get in input a legend of models and runtimes used to get the labels | |
""" | |
#row data of the bars | |
lst_row : List["St_chart_row"] = list() | |
#legend | |
lst_legend : List[plt.Line2D] = list() | |
for n_index, st_scorecard in enumerate(self.gst_scorecard): | |
#LABEL that is linked with the legend | |
#Compute model label | |
s_model_name = st_scorecard.get_model_name() | |
if s_model_name in i_d_legend: | |
s_model_label = i_d_legend[s_model_name] | |
else: | |
s_model_label = "ERR" | |
#Compute model label | |
s_runtime_name = st_scorecard.get_runtime_name() | |
if s_runtime_name in i_d_legend: | |
s_runtime_label = i_d_legend[s_runtime_name] | |
else: | |
s_runtime_label = "ERR" | |
st_row : St_chart_row= St_chart_row() | |
#X axis model size GB | |
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}" | |
#chart color | |
st_row.s_color = "green" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
st_row : St_chart_row= St_chart_row() | |
#X axis model size GB | |
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb | |
#Y axis, answer per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}" | |
st_row.s_color = "pink" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
#Ask the chart generator to do its thing | |
x_fail = self.generate_plot_log( "model_size_vs_speed_tps", "Model Size [GB]", "Speed [T/s]", lst_row, i_d_legend, i_s_legend_type ) | |
return x_fail | |
def generate_x_size_y_speed_cps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool: | |
""" | |
I can receive the legend as higher hierarchy. after all all my chart should list model and runtime? | |
get in input a legend of models and runtimes used to get the labels | |
""" | |
#row data of the bars | |
lst_row : List["St_chart_row"] = list() | |
#legend | |
lst_legend : List[plt.Line2D] = list() | |
for n_index, st_scorecard in enumerate(self.gst_scorecard): | |
#LABEL that is linked with the legend | |
#Compute model label | |
s_model_name = st_scorecard.get_model_name() | |
if s_model_name in i_d_legend: | |
s_model_label = i_d_legend[s_model_name] | |
else: | |
s_model_label = "ERR" | |
#Compute model label | |
s_runtime_name = st_scorecard.get_runtime_name() | |
if s_runtime_name in i_d_legend: | |
s_runtime_label = i_d_legend[s_runtime_name] | |
else: | |
s_runtime_label = "ERR" | |
st_row : St_chart_row= St_chart_row() | |
#X axis model size GB | |
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}" | |
#chart color | |
st_row.s_color = "green" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
st_row : St_chart_row= St_chart_row() | |
#X axis model size GB | |
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb | |
#Y axis, answer per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}" | |
st_row.s_color = "pink" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
#Ask the chart generator to do its thing | |
x_fail = self.generate_plot_log( "model_size_vs_speed_cps", "Model Size [GB]", "Speed [C/s]", lst_row, i_d_legend, i_s_legend_type ) | |
return x_fail | |
def generate_x_size_y_accuracy(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool: | |
""" | |
Here I plot the three accuracies | |
-Structure | |
-Matching | |
-Accuracy | |
against the model size | |
It's a linear Y scale, fixed 0 to 1 | |
""" | |
#row data of the bars | |
lst_row : List["St_chart_row"] = list() | |
#legend | |
lst_legend : List[plt.Line2D] = list() | |
for n_index, st_scorecard in enumerate(self.gst_scorecard): | |
#LABEL that is linked with the legend | |
#Compute model label | |
s_model_name = st_scorecard.get_model_name() | |
if s_model_name in i_d_legend: | |
s_model_label = i_d_legend[s_model_name] | |
else: | |
s_model_label = "ERR" | |
#Compute model label | |
s_runtime_name = st_scorecard.get_runtime_name() | |
if s_runtime_name in i_d_legend: | |
s_runtime_label = i_d_legend[s_runtime_name] | |
else: | |
s_runtime_label = "ERR" | |
# STRUCTURE SCORE | |
st_row : St_chart_row= St_chart_row() | |
#X axis model size GB | |
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_score_structure.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_score_structure.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}" | |
#chart color | |
st_row.s_color = "orange" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# MATCH SCORE | |
st_row : St_chart_row= St_chart_row() | |
#X axis model size GB | |
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_score_match.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_score_match.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}" | |
#chart color | |
st_row.s_color = "green" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# MATCH SCORE | |
st_row : St_chart_row= St_chart_row() | |
#X axis model size GB | |
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_score_accuracy.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_score_accuracy.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}" | |
#chart color | |
st_row.s_color = "cyan" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
#Ask the chart generator to do its thing | |
x_fail = self.generate_plot_probability( "model_size_vs_accuracy", "Model Size [GB]", "Accuracy [%]", lst_row, i_d_legend, i_s_legend_type ) | |
return x_fail | |
def generate_x_accuracy_y_speed_tps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool: | |
""" | |
TODO | |
I can receive the legend as higher hierarchy. after all all my chart should list model and runtime? | |
get in input a legend of models and runtimes used to get the labels | |
""" | |
#row data of the bars | |
lst_row : List["St_chart_row"] = list() | |
#legend | |
lst_legend : List[plt.Line2D] = list() | |
for n_index, st_scorecard in enumerate(self.gst_scorecard): | |
#LABEL that is linked with the legend | |
#Compute model label | |
s_model_name = st_scorecard.get_model_name() | |
if s_model_name in i_d_legend: | |
s_model_label = i_d_legend[s_model_name] | |
else: | |
s_model_label = "ERR" | |
#Compute model label | |
s_runtime_name = st_scorecard.get_runtime_name() | |
if s_runtime_name in i_d_legend: | |
s_runtime_label = i_d_legend[s_runtime_name] | |
else: | |
s_runtime_label = "ERR" | |
# QUESTION - STRUCTURE | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_structure.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst" | |
#chart color | |
st_row.s_color = "orange" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# ANSWER - STRUCTURE | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_structure.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns" | |
#chart color | |
st_row.s_color = "orange" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# QUESTION - MATCH | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_match.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_match.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst" | |
#chart color | |
st_row.s_color = "green" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# ANSWER - MATCH | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_match.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_match.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns" | |
#chart color | |
st_row.s_color = "green" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# QUESTION - ACCURACY | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst" | |
#chart color | |
st_row.s_color = "cyan" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# ANSWER - ACCURACY | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns" | |
#chart color | |
st_row.s_color = "cyan" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
#Ask the chart generator to do its thing | |
x_fail = self.generate_plot_log( "model_accuracy_vs_speed_tps", "Accuracy [%]", "Speed [T/s]", lst_row, i_d_legend, i_s_legend_type ) | |
return x_fail | |
def generate_x_accuracy_y_speed_cps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool: | |
""" | |
TODO | |
I can receive the legend as higher hierarchy. after all all my chart should list model and runtime? | |
get in input a legend of models and runtimes used to get the labels | |
""" | |
#row data of the bars | |
lst_row : List["St_chart_row"] = list() | |
#legend | |
lst_legend : List[plt.Line2D] = list() | |
for n_index, st_scorecard in enumerate(self.gst_scorecard): | |
#LABEL that is linked with the legend | |
#Compute model label | |
s_model_name = st_scorecard.get_model_name() | |
if s_model_name in i_d_legend: | |
s_model_label = i_d_legend[s_model_name] | |
else: | |
s_model_label = "ERR" | |
#Compute model label | |
s_runtime_name = st_scorecard.get_runtime_name() | |
if s_runtime_name in i_d_legend: | |
s_runtime_label = i_d_legend[s_runtime_name] | |
else: | |
s_runtime_label = "ERR" | |
# QUESTION - STRUCTURE | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_structure.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst" | |
#chart color | |
st_row.s_color = "orange" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# ANSWER - STRUCTURE | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_structure.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns" | |
#chart color | |
st_row.s_color = "orange" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# QUESTION - MATCH | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_match.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_match.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst" | |
#chart color | |
st_row.s_color = "green" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# ANSWER - MATCH | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_match.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_match.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns" | |
#chart color | |
st_row.s_color = "green" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# QUESTION - ACCURACY | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst" | |
#chart color | |
st_row.s_color = "cyan" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
# ANSWER - ACCURACY | |
#create new row data | |
st_row : St_chart_row = St_chart_row() | |
#X axis model structure accuracy | |
st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg | |
st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std | |
#Y axis, question per second | |
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg | |
st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std | |
#Label | |
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns" | |
#chart color | |
st_row.s_color = "cyan" | |
logging.debug(f"{st_row}") | |
lst_row.append(st_row) | |
#Ask the chart generator to do its thing | |
x_fail = self.generate_plot_log( "model_accuracy_vs_speed_cps", "Accuracy [%]", "Speed [C/s]", lst_row, i_d_legend, i_s_legend_type ) | |
return x_fail | |
#if executed directly, run the test | |
if __name__ == "__main__": | |
import logging | |
from typing import List | |
# Setup logging | |
logging.basicConfig( | |
filename="debug.log", | |
#level=logging.DEBUG, | |
level=logging.INFO, | |
format='[%(asctime)s] %(levelname)s %(module)s:%(lineno)d > %(message)s', | |
filemode='w' | |
) | |
logging.info("BEGIN") | |
cl_generator = Cl_generate_chart() | |
cl_generator.load_scorecard() | |
d_legend = cl_generator.generate_plot_legend() | |
logging.info(f"LEGEND: {d_legend}") | |
#Speed VS Model size charts | |
cl_generator.generate_x_size_y_speed_cps( d_legend, "qa" ) | |
cl_generator.generate_x_size_y_speed_tps( d_legend, "" ) | |
cl_generator.generate_x_size_y_accuracy( d_legend, "sma" ) | |
#Accuracy vs speed chart | |
cl_generator.generate_x_accuracy_y_speed_tps( d_legend, "sma" ) | |
cl_generator.generate_x_accuracy_y_speed_cps( d_legend, "sma" ) | |
logging.info("END") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment