Skip to content

Instantly share code, notes, and snippets.

@OrsoEric
Created August 19, 2025 06:04
Show Gist options
  • Save OrsoEric/dd060bcdce853ca3db266203138d6cea to your computer and use it in GitHub Desktop.
Save OrsoEric/dd060bcdce853ca3db266203138d6cea to your computer and use it in GitHub Desktop.
generate_benchmark_chart.py
"""
Take all the scorecard in the chart folder
Generate various charts and tables comparing models against each other
this script works on the folder "output_chart"
The user manually put any number of aggregate json score cards generated by "execute_benchmark" ona range of models, machines, benchmarks and runtimes
This script takes care of generating useful tables and charts from that data
CHARTS
show the model accuracy against the size on disk
Y accuracy
X size GB
Y speed T/s
X size GB
Y speed C/s
X size GB
show the model speed against the accuracy
Y accuracy
X speed C/s
Y accuracy
X speed T/s
Performance with context against accuracy, it's a polygon
Y accuracy structure, match, accuracy
X context T
Y accuracy structure, match, accuracy
X context C
TABLE
model
compression
QUANT
I want to see the performance across quantization
"""
import logging
import json
from os import path, listdir
from typing import List, Dict
import matplotlib.pyplot as plt
from matplotlib import ticker
#benchmark scorecard structure, the same used by the benchmark executon script
from st_score_card import St_score_card
def load_model_sizes(json_file_path):
with open(json_file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
d_model_sizes = {
path.splitext(entry["s_filename"])[0].lower(): entry["n_size_gb"]
for entry in data
}
return d_model_sizes
def get_files_in_directory(i_s_folder_path: str, s_extension = ".json") -> List[str]:
"""
This function takes a directory path as input and returns a list containing
paths of all JSON files within the directory.
Parameters:
i_folder_path (str): The path to the folder where the search is conducted.
Returns:
List[str]: A list of strings, each representing the full path of a .json file in the specified folder.
"""
ln_json_files = []
for st_file_name in listdir(i_s_folder_path):
if st_file_name.endswith(s_extension):
st_full_file_path = path.join(i_s_folder_path, st_file_name)
ln_json_files.append(st_full_file_path)
return ln_json_files
class St_chart_row:
"""
defines a row of in a chart
"""
def __init__(self):
#X position
self.n_axis_x : float = 0.0
self.n_axis_x_std : float = 0.0
#Y bar height
self.n_axis_y_avg : float = 0.0
self.n_axis_y_std : float = 0.0
#decorators
self.s_color : str = ""
self.s_label : str = ""
pass
def __repr__(self):
return f"X: {self.n_axis_x} | Yavg: {self.n_axis_y_avg} | Ystd: {self.n_axis_y_std} | Color: {self.s_color}"
class Cl_generate_chart():
def __init__(self):
self.gst_scorecard : List[St_score_card] = list()
return
def load_scorecard(self) -> bool:
# LOAD all JSON
s_path_json = path.join( "output_chart" )
ls_file_json = get_files_in_directory( s_path_json )
logging.debug(f"JSON files {len(ls_file_json)}: {ls_file_json}")
s_path_model_size = path.join( "output_chart", "model_size.json" )
#If I have a json listing all the model sizes
if s_path_model_size in ls_file_json:
# LOAD model size
d_model_size = load_model_sizes(s_path_model_size)
logging.info(f"Loaded size from {len(d_model_size)} models: {d_model_size}")
#It's not necessary, I'll later skip some very useful metrics
else:
d_model_size = dict()
#load only the scorecards and discard all other JSON
ls_path_scorecard = [s for s in ls_file_json if "aggregate" in s]
logging.info(f"Scorecards {len(ls_path_scorecard)} | {ls_path_scorecard}")
lst_scorecard : List[St_score_card] = list()
logging.debug(f"KEYS {d_model_size.keys()}")
#now load the content of the scorecards
for s_path_scorecard in ls_path_scorecard:
st_score_card : St_score_card = St_score_card()
st_score_card.load_json(s_path_scorecard)
# @bugfix some models also append the quantization with at after the model name
st_score_card.st_stat_model.s_name = st_score_card.st_stat_model.s_name.replace("@", "-")
logging.info(f"look for: {st_score_card.st_stat_model.s_name.lower()} | {[s_model_name for s_model_name in d_model_size if st_score_card.st_stat_model.s_name.lower() in s_model_name]}")
logging.info(f"look for: {st_score_card.st_stat_model.s_quantization.lower()} | {[s_model_name for s_model_name in d_model_size if st_score_card.st_stat_model.s_quantization.lower() in s_model_name]}")
ls_model = [s_model_name for s_model_name in d_model_size if st_score_card.st_stat_model.s_name.lower() in s_model_name and st_score_card.st_stat_model.s_quantization.lower() in s_model_name]
if len(ls_model)>0:
n_size_gb = d_model_size[ls_model[0]]
st_score_card.st_stat_model.n_size_on_disk_gb = n_size_gb
logging.info(f"Found size of model in GB: {n_size_gb}")
else:
logging.info(f"ERR: model size of {s_path_scorecard} in GB not found.")
logging.debug(f"laoded: {st_score_card}")
lst_scorecard.append(st_score_card)
self.gst_scorecard = lst_scorecard
logging.info(f"laoded {len(lst_scorecard)} scorecards")
return False #OK
def generate_table(self) -> bool:
"""
Tables
"""
class St_row:
def __init__(self):
self.s_machine_hostname : str = ""
return
return False #OK
def add_sma_match_score_accuracy_to_legend(self, i_lst_legend : List[plt.Line2D]) -> bool:
# Add "MATCH SCORE" entry with green color
st_legend = plt.Line2D([], [], color='green', linestyle='', label='STRUCTURE SCORE (understand tags)')
i_lst_legend.append(st_legend)
st_legend = plt.Line2D([], [], color='orange', linestyle='', label='MATCH SCORE (answer to every question)')
i_lst_legend.append(st_legend)
st_legend = plt.Line2D([], [], color='cyan', linestyle='', label='ACCURACY SCORE (answer correctly)')
i_lst_legend.append(st_legend)
return False #OK
def add_qa_question_answer_to_legend(self, i_lst_legend : List[plt.Line2D]) -> bool:
# Add "MATCH SCORE" entry with green color
st_legend = plt.Line2D([], [], color='green', linestyle='', label='QUESTION SPEED (time to first token)')
i_lst_legend.append(st_legend)
st_legend = plt.Line2D([], [], color='pink', linestyle='', label='ANSWER SPEED (token generation speed)')
i_lst_legend.append(st_legend)
return False #OK
def generate_plot_legend(self) -> Dict:
"""
From the scorecards I extract all the models and all the runtimes
I extract two dictionaries associating a key with a model name and a runtime name
models use numbers
runtimes use letters
"""
d_legend : Dict = dict()
n_cnt_model : int = 0
n_cnt_runtime : int = 0
#chr(ord('A')+n_index
for n_index, st_scorecard in enumerate(self.gst_scorecard):
#extract model name
s_model_name = st_scorecard.get_model_name()
#if model name is not already saved
if s_model_name not in d_legend:
n_cnt_model += 1
d_legend[s_model_name] = f"{n_cnt_model}"
logging.info(f"New model {s_model_name} | {n_cnt_model}")
#if model is already saved
else:
#do nothing
pass
#extract runtime name
s_runtime_name = st_scorecard.get_runtime_name()
#if runtime name is not already saved
if s_runtime_name not in d_legend:
n_cnt_runtime += 1
d_legend[s_runtime_name] = f"{chr(ord('A')+n_cnt_runtime-1)}"
logging.info(f"New runtime {s_runtime_name} | {n_cnt_runtime}")
#if model is already saved
else:
#do nothing
pass
return d_legend
#--------------------------------------------------------------------------------------------
# Generic Plot Generators
#--------------------------------------------------------------------------------------------
def generate_plot_log(self, i_s_name_chart : str, i_s_name_axis_x : str, i_s_name_axis_y : str, i_lst_data : List["St_chart_row"], i_d_legend : Dict, i_s_legend_type : str ) -> bool:
"""
This function encapsulates the chart generation circuitry
so that specialized chart generation function have less redundant
work to do to make charts
It's designed to do bar charts on logarithmic Y axis
"""
# Create plot
plt.figure(figsize=(12, 8))
plt.title(f"{i_s_name_axis_x} (x) VS {i_s_name_axis_y} (Y)")
plt.xlabel(i_s_name_axis_x)
plt.ylabel(i_s_name_axis_y)
#Logarithmic chart
plt.yscale('log')
yticker = ticker.LogLocator(base=2, numticks=20)
plt.gca().yaxis.set_major_locator(yticker)
plt.gca().yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}'))
for st_row in i_lst_data:
#if the bar is too small, draw a dot instead
if st_row.n_axis_y_std < abs(st_row.n_axis_y_avg)/1000.0:
plt.plot(st_row.n_axis_x, st_row.n_axis_y_avg, "o", color=st_row.s_color )
#draw the bar where height represent the standard deviation
else:
n_bar_height = st_row.n_axis_y_std
n_bar_bottom = st_row.n_axis_y_avg -st_row.n_axis_y_std/2
#use std information to change width
if st_row.n_axis_x_std < 0.05:
x_std = 0.05
else:
x_std = st_row.n_axis_x_std
plt.bar(st_row.n_axis_x, n_bar_height, width=x_std, bottom=n_bar_bottom, color=st_row.s_color, alpha = 0.5)
plt.text(st_row.n_axis_x, st_row.n_axis_y_avg, st_row.s_label, fontsize=12, ha='center')
#Convert legend from dictionary
lst_legend : List[plt.Line2D] = list()
# Scan the legend and sort by value
for s_key, s_value in sorted(i_d_legend.items(), key=lambda item: item[1]):
st_legend = plt.Line2D([], [], linestyle='', label=f'{s_value}: {s_key}')
lst_legend.append(st_legend)
#if I want to add Structure, Match, Accuracy
if i_s_legend_type == "sma":
self.add_sma_match_score_accuracy_to_legend( lst_legend )
elif i_s_legend_type == "qa":
self.add_qa_question_answer_to_legend( lst_legend )
#draw the legends
plt.legend(
handles=lst_legend,
labels=[legend.get_label() for legend in lst_legend],
labelcolor = [legend.get_color() for legend in lst_legend],
handler_map={},
#labelcolor='green', # Set text color to green globally (or customize per entry)
bbox_to_anchor=(1.2, 0.75),
loc='lower center'
)
plt.grid(True)
# Save the plot as a PNG file
s_path = path.join( "output_chart", f"{i_s_name_chart}.png" )
plt.savefig(s_path, bbox_inches='tight')
plt.close()
return False #OK
def generate_plot_probability(self, i_s_name_chart : str, i_s_name_axis_x : str, i_s_name_axis_y : str, i_lst_data : List["St_chart_row"], i_d_legend : Dict, i_s_legend_type : str ) -> bool:
"""
This function encapsulates the chart generation circuitry
so that specialized chart generation function have less redundant
work to do to make charts
It's designed to show probabilities between zero and one
"""
# Create plot
plt.figure(figsize=(12, 8))
plt.title(f"{i_s_name_axis_x} (x) VS {i_s_name_axis_y} (Y)")
plt.xlabel(i_s_name_axis_x)
plt.ylabel(i_s_name_axis_y)
# Set the Y-axis range from 0 to 1
plt.ylim(0, 1)
# Format Y-axis labels as percentages
plt.gca().yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.3f}'))
for st_row in i_lst_data:
#if the bar is too small, draw a dot instead
if st_row.n_axis_y_std < abs(st_row.n_axis_y_avg)/1000.0:
plt.plot(st_row.n_axis_x, st_row.n_axis_y_avg, "o", color=st_row.s_color )
#draw the bar where height represent the standard deviation
else:
n_bar_height = st_row.n_axis_y_std
n_bar_bottom = st_row.n_axis_y_avg -st_row.n_axis_y_std/2
plt.bar(st_row.n_axis_x, n_bar_height, width=0.1, bottom=n_bar_bottom, color=st_row.s_color)
plt.text(st_row.n_axis_x, st_row.n_axis_y_avg, st_row.s_label, fontsize=12, ha='center')
#Convert legend from dictionary
lst_legend : List[plt.Line2D] = list()
# Scan the legend and sort by value
for s_key, s_value in sorted(i_d_legend.items(), key=lambda item: item[1]):
st_legend = plt.Line2D([], [], linestyle='', label=f'{s_value}: {s_key}')
lst_legend.append(st_legend)
#if I want to add Structure, Match, Accuracy
if i_s_legend_type == "sma":
self.add_sma_match_score_accuracy_to_legend( lst_legend )
elif i_s_legend_type == "qa":
self.add_qa_question_answer_to_legend( lst_legend )
#draw the legends
#plt.legend(handles=lst_legend, bbox_to_anchor=(1.2, 0.75), loc='lower center')
plt.legend(
handles=lst_legend,
labels=[legend.get_label() for legend in lst_legend],
labelcolor = [legend.get_color() for legend in lst_legend],
handler_map={},
#labelcolor='green', # Set text color to green globally (or customize per entry)
bbox_to_anchor=(1.2, 0.75),
loc='lower center'
)
plt.grid(True)
# Save the plot as a PNG file
s_path = path.join( "output_chart", f"{i_s_name_chart}.png" )
plt.savefig(s_path, bbox_inches='tight')
plt.close()
return False #OK
#--------------------------------------------------------------------------------------------
# Specific Charts XY
#--------------------------------------------------------------------------------------------
def generate_x_size_y_speed_tps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool:
"""
I can receive the legend as higher hierarchy. after all all my chart should list model and runtime?
get in input a legend of models and runtimes used to get the labels
"""
#row data of the bars
lst_row : List["St_chart_row"] = list()
#legend
lst_legend : List[plt.Line2D] = list()
for n_index, st_scorecard in enumerate(self.gst_scorecard):
#LABEL that is linked with the legend
#Compute model label
s_model_name = st_scorecard.get_model_name()
if s_model_name in i_d_legend:
s_model_label = i_d_legend[s_model_name]
else:
s_model_label = "ERR"
#Compute model label
s_runtime_name = st_scorecard.get_runtime_name()
if s_runtime_name in i_d_legend:
s_runtime_label = i_d_legend[s_runtime_name]
else:
s_runtime_label = "ERR"
st_row : St_chart_row= St_chart_row()
#X axis model size GB
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}"
#chart color
st_row.s_color = "green"
logging.debug(f"{st_row}")
lst_row.append(st_row)
st_row : St_chart_row= St_chart_row()
#X axis model size GB
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
#Y axis, answer per second
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}"
st_row.s_color = "pink"
logging.debug(f"{st_row}")
lst_row.append(st_row)
#Ask the chart generator to do its thing
x_fail = self.generate_plot_log( "model_size_vs_speed_tps", "Model Size [GB]", "Speed [T/s]", lst_row, i_d_legend, i_s_legend_type )
return x_fail
def generate_x_size_y_speed_cps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool:
"""
I can receive the legend as higher hierarchy. after all all my chart should list model and runtime?
get in input a legend of models and runtimes used to get the labels
"""
#row data of the bars
lst_row : List["St_chart_row"] = list()
#legend
lst_legend : List[plt.Line2D] = list()
for n_index, st_scorecard in enumerate(self.gst_scorecard):
#LABEL that is linked with the legend
#Compute model label
s_model_name = st_scorecard.get_model_name()
if s_model_name in i_d_legend:
s_model_label = i_d_legend[s_model_name]
else:
s_model_label = "ERR"
#Compute model label
s_runtime_name = st_scorecard.get_runtime_name()
if s_runtime_name in i_d_legend:
s_runtime_label = i_d_legend[s_runtime_name]
else:
s_runtime_label = "ERR"
st_row : St_chart_row= St_chart_row()
#X axis model size GB
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}"
#chart color
st_row.s_color = "green"
logging.debug(f"{st_row}")
lst_row.append(st_row)
st_row : St_chart_row= St_chart_row()
#X axis model size GB
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
#Y axis, answer per second
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}"
st_row.s_color = "pink"
logging.debug(f"{st_row}")
lst_row.append(st_row)
#Ask the chart generator to do its thing
x_fail = self.generate_plot_log( "model_size_vs_speed_cps", "Model Size [GB]", "Speed [C/s]", lst_row, i_d_legend, i_s_legend_type )
return x_fail
def generate_x_size_y_accuracy(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool:
"""
Here I plot the three accuracies
-Structure
-Matching
-Accuracy
against the model size
It's a linear Y scale, fixed 0 to 1
"""
#row data of the bars
lst_row : List["St_chart_row"] = list()
#legend
lst_legend : List[plt.Line2D] = list()
for n_index, st_scorecard in enumerate(self.gst_scorecard):
#LABEL that is linked with the legend
#Compute model label
s_model_name = st_scorecard.get_model_name()
if s_model_name in i_d_legend:
s_model_label = i_d_legend[s_model_name]
else:
s_model_label = "ERR"
#Compute model label
s_runtime_name = st_scorecard.get_runtime_name()
if s_runtime_name in i_d_legend:
s_runtime_label = i_d_legend[s_runtime_name]
else:
s_runtime_label = "ERR"
# STRUCTURE SCORE
st_row : St_chart_row= St_chart_row()
#X axis model size GB
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_score_structure.n_avg
st_row.n_axis_y_std = st_scorecard.n_score_structure.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}"
#chart color
st_row.s_color = "orange"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# MATCH SCORE
st_row : St_chart_row= St_chart_row()
#X axis model size GB
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_score_match.n_avg
st_row.n_axis_y_std = st_scorecard.n_score_match.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}"
#chart color
st_row.s_color = "green"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# MATCH SCORE
st_row : St_chart_row= St_chart_row()
#X axis model size GB
st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_score_accuracy.n_avg
st_row.n_axis_y_std = st_scorecard.n_score_accuracy.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}"
#chart color
st_row.s_color = "cyan"
logging.debug(f"{st_row}")
lst_row.append(st_row)
#Ask the chart generator to do its thing
x_fail = self.generate_plot_probability( "model_size_vs_accuracy", "Model Size [GB]", "Accuracy [%]", lst_row, i_d_legend, i_s_legend_type )
return x_fail
def generate_x_accuracy_y_speed_tps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool:
"""
TODO
I can receive the legend as higher hierarchy. after all all my chart should list model and runtime?
get in input a legend of models and runtimes used to get the labels
"""
#row data of the bars
lst_row : List["St_chart_row"] = list()
#legend
lst_legend : List[plt.Line2D] = list()
for n_index, st_scorecard in enumerate(self.gst_scorecard):
#LABEL that is linked with the legend
#Compute model label
s_model_name = st_scorecard.get_model_name()
if s_model_name in i_d_legend:
s_model_label = i_d_legend[s_model_name]
else:
s_model_label = "ERR"
#Compute model label
s_runtime_name = st_scorecard.get_runtime_name()
if s_runtime_name in i_d_legend:
s_runtime_label = i_d_legend[s_runtime_name]
else:
s_runtime_label = "ERR"
# QUESTION - STRUCTURE
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_structure.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
#chart color
st_row.s_color = "orange"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# ANSWER - STRUCTURE
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_structure.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
#chart color
st_row.s_color = "orange"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# QUESTION - MATCH
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_match.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_match.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
#chart color
st_row.s_color = "green"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# ANSWER - MATCH
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_match.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_match.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
#chart color
st_row.s_color = "green"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# QUESTION - ACCURACY
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
#chart color
st_row.s_color = "cyan"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# ANSWER - ACCURACY
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
#chart color
st_row.s_color = "cyan"
logging.debug(f"{st_row}")
lst_row.append(st_row)
#Ask the chart generator to do its thing
x_fail = self.generate_plot_log( "model_accuracy_vs_speed_tps", "Accuracy [%]", "Speed [T/s]", lst_row, i_d_legend, i_s_legend_type )
return x_fail
def generate_x_accuracy_y_speed_cps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool:
"""
TODO
I can receive the legend as higher hierarchy. after all all my chart should list model and runtime?
get in input a legend of models and runtimes used to get the labels
"""
#row data of the bars
lst_row : List["St_chart_row"] = list()
#legend
lst_legend : List[plt.Line2D] = list()
for n_index, st_scorecard in enumerate(self.gst_scorecard):
#LABEL that is linked with the legend
#Compute model label
s_model_name = st_scorecard.get_model_name()
if s_model_name in i_d_legend:
s_model_label = i_d_legend[s_model_name]
else:
s_model_label = "ERR"
#Compute model label
s_runtime_name = st_scorecard.get_runtime_name()
if s_runtime_name in i_d_legend:
s_runtime_label = i_d_legend[s_runtime_name]
else:
s_runtime_label = "ERR"
# QUESTION - STRUCTURE
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_structure.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
#chart color
st_row.s_color = "orange"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# ANSWER - STRUCTURE
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_structure.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
#chart color
st_row.s_color = "orange"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# QUESTION - MATCH
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_match.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_match.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
#chart color
st_row.s_color = "green"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# ANSWER - MATCH
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_match.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_match.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
#chart color
st_row.s_color = "green"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# QUESTION - ACCURACY
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
#chart color
st_row.s_color = "cyan"
logging.debug(f"{st_row}")
lst_row.append(st_row)
# ANSWER - ACCURACY
#create new row data
st_row : St_chart_row = St_chart_row()
#X axis model structure accuracy
st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg
st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std
#Y axis, question per second
st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg
st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std
#Label
st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
#chart color
st_row.s_color = "cyan"
logging.debug(f"{st_row}")
lst_row.append(st_row)
#Ask the chart generator to do its thing
x_fail = self.generate_plot_log( "model_accuracy_vs_speed_cps", "Accuracy [%]", "Speed [C/s]", lst_row, i_d_legend, i_s_legend_type )
return x_fail
#if executed directly, run the test
if __name__ == "__main__":
import logging
from typing import List
# Setup logging
logging.basicConfig(
filename="debug.log",
#level=logging.DEBUG,
level=logging.INFO,
format='[%(asctime)s] %(levelname)s %(module)s:%(lineno)d > %(message)s',
filemode='w'
)
logging.info("BEGIN")
cl_generator = Cl_generate_chart()
cl_generator.load_scorecard()
d_legend = cl_generator.generate_plot_legend()
logging.info(f"LEGEND: {d_legend}")
#Speed VS Model size charts
cl_generator.generate_x_size_y_speed_cps( d_legend, "qa" )
cl_generator.generate_x_size_y_speed_tps( d_legend, "" )
cl_generator.generate_x_size_y_accuracy( d_legend, "sma" )
#Accuracy vs speed chart
cl_generator.generate_x_accuracy_y_speed_tps( d_legend, "sma" )
cl_generator.generate_x_accuracy_y_speed_cps( d_legend, "sma" )
logging.info("END")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment