OrsoEric · August 19, 2025 06:04
diff --git a/gistfile1.txt b/gistfile1.txt
 """
 Take all the scorecard in the chart folder
 Generate various charts and tables comparing models against each other

 this script works on the folder "output_chart"

 The user manually put any number of aggregate json score cards generated by "execute_benchmark" ona range of models, machines, benchmarks and runtimes

 This script takes care of generating useful tables and charts from that data

 CHARTS

 show the model accuracy against the size on disk
    Y accuracy 
    X size GB

    Y speed T/s
    X size GB

    Y speed C/s
    X size GB

    
 show the model speed against the accuracy
    Y accuracy
    X speed C/s

    Y accuracy
    X speed T/s

 Performance with context against accuracy, it's a polygon
    Y accuracy structure, match, accuracy
    X context T
    
    Y accuracy structure, match, accuracy
    X context C
    


 TABLE
 model
 compression


 QUANT
 I want to see the performance across quantization


 """

 import logging
 import json
 from os import path, listdir
 from typing import List, Dict
 import matplotlib.pyplot as plt
 from matplotlib import ticker
 #benchmark scorecard structure, the same used by the benchmark executon script
 from st_score_card import St_score_card

 def load_model_sizes(json_file_path):
    with open(json_file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    
    d_model_sizes = {
        path.splitext(entry["s_filename"])[0].lower(): entry["n_size_gb"]
        for entry in data
    }

    return d_model_sizes

 def get_files_in_directory(i_s_folder_path: str, s_extension = ".json") -> List[str]:
    """
    This function takes a directory path as input and returns a list containing 
    paths of all JSON files within the directory.

    Parameters:
    i_folder_path (str): The path to the folder where the search is conducted.
    
    Returns:
    List[str]: A list of strings, each representing the full path of a .json file in the specified folder.
    """
    ln_json_files = []
    for st_file_name in listdir(i_s_folder_path):
        if st_file_name.endswith(s_extension):
            st_full_file_path = path.join(i_s_folder_path, st_file_name)
            ln_json_files.append(st_full_file_path)
    
    return ln_json_files

 class St_chart_row:
    """
    defines a row of in a chart
    """
    def __init__(self):
        #X position
        self.n_axis_x : float = 0.0
        self.n_axis_x_std : float = 0.0
        #Y bar height
        self.n_axis_y_avg : float = 0.0
        self.n_axis_y_std : float = 0.0
        #decorators
        self.s_color : str = ""
        self.s_label : str = ""

        pass
    
    def __repr__(self):
        return f"X: {self.n_axis_x} | Yavg: {self.n_axis_y_avg} | Ystd: {self.n_axis_y_std} | Color: {self.s_color}"

 class Cl_generate_chart():
    def __init__(self):

        self.gst_scorecard : List[St_score_card] = list()
        
        return

        
    def load_scorecard(self) -> bool:
        # LOAD all JSON
        s_path_json = path.join( "output_chart" )
        ls_file_json  = get_files_in_directory( s_path_json )
        logging.debug(f"JSON files {len(ls_file_json)}: {ls_file_json}")

        s_path_model_size = path.join( "output_chart", "model_size.json" )
        #If I have a json listing all the model sizes
        if s_path_model_size in ls_file_json:
            # LOAD model size
            d_model_size = load_model_sizes(s_path_model_size)
            logging.info(f"Loaded size from {len(d_model_size)} models: {d_model_size}")
        #It's not necessary, I'll later skip some very useful metrics
        else:
            d_model_size = dict()

        #load only the scorecards and discard all other JSON
        ls_path_scorecard = [s for s in ls_file_json if "aggregate" in s]
        logging.info(f"Scorecards {len(ls_path_scorecard)} | {ls_path_scorecard}")

        lst_scorecard : List[St_score_card] = list()
        logging.debug(f"KEYS {d_model_size.keys()}")
        #now load the content of the scorecards
        for s_path_scorecard in ls_path_scorecard:
            st_score_card : St_score_card = St_score_card()
            st_score_card.load_json(s_path_scorecard)

            # @bugfix some models also append the quantization with at after the model name
            st_score_card.st_stat_model.s_name = st_score_card.st_stat_model.s_name.replace("@", "-")

            logging.info(f"look for: {st_score_card.st_stat_model.s_name.lower()} | {[s_model_name for s_model_name in d_model_size if st_score_card.st_stat_model.s_name.lower() in s_model_name]}")
            logging.info(f"look for: {st_score_card.st_stat_model.s_quantization.lower()} | {[s_model_name for s_model_name in d_model_size if st_score_card.st_stat_model.s_quantization.lower() in s_model_name]}")

            ls_model = [s_model_name for s_model_name in d_model_size if st_score_card.st_stat_model.s_name.lower() in s_model_name and st_score_card.st_stat_model.s_quantization.lower() in s_model_name]

            
            if len(ls_model)>0:
                n_size_gb = d_model_size[ls_model[0]]
                st_score_card.st_stat_model.n_size_on_disk_gb = n_size_gb
                logging.info(f"Found size of model in GB: {n_size_gb}")
            else:
                logging.info(f"ERR: model size of {s_path_scorecard} in GB not found.")


            logging.debug(f"laoded: {st_score_card}")
            lst_scorecard.append(st_score_card)

        self.gst_scorecard = lst_scorecard
        logging.info(f"laoded {len(lst_scorecard)} scorecards")

        return False #OK

    def generate_table(self) -> bool: 
        """
        Tables

        
        """

        class St_row:
            def __init__(self):
                self.s_machine_hostname : str = ""

                return





        return False #OK

    def add_sma_match_score_accuracy_to_legend(self, i_lst_legend : List[plt.Line2D]) -> bool:

        # Add "MATCH SCORE" entry with green color
        st_legend = plt.Line2D([], [], color='green', linestyle='', label='STRUCTURE SCORE (understand tags)')
        i_lst_legend.append(st_legend)
        st_legend = plt.Line2D([], [], color='orange', linestyle='', label='MATCH SCORE (answer to every question)')
        i_lst_legend.append(st_legend)
        st_legend = plt.Line2D([], [], color='cyan', linestyle='', label='ACCURACY SCORE (answer correctly)')
        i_lst_legend.append(st_legend)

        return False #OK
    
    def add_qa_question_answer_to_legend(self, i_lst_legend : List[plt.Line2D]) -> bool:

        # Add "MATCH SCORE" entry with green color
        st_legend = plt.Line2D([], [], color='green', linestyle='', label='QUESTION SPEED (time to first token)')
        i_lst_legend.append(st_legend)
        st_legend = plt.Line2D([], [], color='pink', linestyle='', label='ANSWER SPEED (token generation speed)')
        i_lst_legend.append(st_legend)

        return False #OK

    def generate_plot_legend(self) -> Dict:
        """
        From the scorecards I extract all the models and all the runtimes
        I extract two dictionaries associating a key with a model name and a runtime name
        models use numbers
        runtimes use letters
        """

        d_legend : Dict = dict()
        n_cnt_model : int  = 0
        n_cnt_runtime : int  = 0
        #chr(ord('A')+n_index
        for n_index, st_scorecard in enumerate(self.gst_scorecard):
            #extract model name
            s_model_name = st_scorecard.get_model_name()
            #if model name is not already saved
            if s_model_name not in d_legend:
                n_cnt_model += 1
                d_legend[s_model_name] = f"{n_cnt_model}"
                logging.info(f"New model {s_model_name} | {n_cnt_model}")
            #if model is already saved
            else:
                #do nothing
                pass
            #extract runtime name
            s_runtime_name = st_scorecard.get_runtime_name()
            #if runtime name is not already saved
            if s_runtime_name not in d_legend:
                n_cnt_runtime += 1
                d_legend[s_runtime_name] = f"{chr(ord('A')+n_cnt_runtime-1)}"
                logging.info(f"New runtime {s_runtime_name} | {n_cnt_runtime}")
            #if model is already saved
            else:
                #do nothing
                pass

    

        return d_legend
    
    #--------------------------------------------------------------------------------------------
    #   Generic Plot Generators
    #--------------------------------------------------------------------------------------------

    def generate_plot_log(self, i_s_name_chart : str, i_s_name_axis_x : str, i_s_name_axis_y : str,  i_lst_data : List["St_chart_row"], i_d_legend : Dict, i_s_legend_type : str  ) -> bool:
        """
        This function encapsulates the chart generation circuitry
        so that specialized chart generation function have less redundant
        work to do to make charts

        It's designed to do bar charts on logarithmic Y axis
        """

        # Create plot
        plt.figure(figsize=(12, 8))
        plt.title(f"{i_s_name_axis_x} (x) VS {i_s_name_axis_y} (Y)")
        plt.xlabel(i_s_name_axis_x)
        plt.ylabel(i_s_name_axis_y)
        #Logarithmic chart
        plt.yscale('log')
        yticker = ticker.LogLocator(base=2, numticks=20)
        plt.gca().yaxis.set_major_locator(yticker)
        plt.gca().yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.0f}'))

        for st_row in i_lst_data:
            #if the bar is too small, draw a dot instead            
            if st_row.n_axis_y_std < abs(st_row.n_axis_y_avg)/1000.0:
                plt.plot(st_row.n_axis_x, st_row.n_axis_y_avg, "o", color=st_row.s_color )
            #draw the bar where height represent the standard deviation
            else:
                n_bar_height = st_row.n_axis_y_std
                n_bar_bottom = st_row.n_axis_y_avg -st_row.n_axis_y_std/2
                #use std information to change width
                if st_row.n_axis_x_std < 0.05:
                    x_std = 0.05
                else:
                    x_std = st_row.n_axis_x_std
                plt.bar(st_row.n_axis_x, n_bar_height, width=x_std, bottom=n_bar_bottom, color=st_row.s_color, alpha = 0.5)
                
            plt.text(st_row.n_axis_x, st_row.n_axis_y_avg, st_row.s_label, fontsize=12, ha='center')

        #Convert legend from dictionary
        lst_legend : List[plt.Line2D] = list()
        # Scan the legend and sort by value
        for s_key, s_value in sorted(i_d_legend.items(), key=lambda item: item[1]):
            st_legend = plt.Line2D([], [], linestyle='', label=f'{s_value}: {s_key}')
            lst_legend.append(st_legend)

        #if I want to add Structure, Match, Accuracy
        if i_s_legend_type == "sma":
            self.add_sma_match_score_accuracy_to_legend( lst_legend )
        elif i_s_legend_type == "qa":
            self.add_qa_question_answer_to_legend( lst_legend )

        #draw the legends
        plt.legend(
            handles=lst_legend,
            labels=[legend.get_label() for legend in lst_legend],
            labelcolor = [legend.get_color() for legend in lst_legend],
            handler_map={},
            #labelcolor='green',  # Set text color to green globally (or customize per entry)
            bbox_to_anchor=(1.2, 0.75),
            loc='lower center'
        )

        plt.grid(True)
        
        # Save the plot as a PNG file
        s_path = path.join( "output_chart", f"{i_s_name_chart}.png" )
        plt.savefig(s_path, bbox_inches='tight')
        plt.close()

        return False #OK


    def generate_plot_probability(self, i_s_name_chart : str, i_s_name_axis_x : str, i_s_name_axis_y : str,  i_lst_data : List["St_chart_row"], i_d_legend : Dict, i_s_legend_type : str  ) -> bool:
        """
        This function encapsulates the chart generation circuitry
        so that specialized chart generation function have less redundant
        work to do to make charts

        It's designed to show probabilities between zero and one
        """

        # Create plot
        plt.figure(figsize=(12, 8))
        plt.title(f"{i_s_name_axis_x} (x) VS {i_s_name_axis_y} (Y)")
        plt.xlabel(i_s_name_axis_x)
        plt.ylabel(i_s_name_axis_y)
        
        # Set the Y-axis range from 0 to 1
        plt.ylim(0, 1)    
        # Format Y-axis labels as percentages
        plt.gca().yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:.3f}'))
        

        for st_row in i_lst_data:
            #if the bar is too small, draw a dot instead            
            if st_row.n_axis_y_std < abs(st_row.n_axis_y_avg)/1000.0:
                plt.plot(st_row.n_axis_x, st_row.n_axis_y_avg, "o", color=st_row.s_color )
            #draw the bar where height represent the standard deviation
            else:
                n_bar_height = st_row.n_axis_y_std
                n_bar_bottom = st_row.n_axis_y_avg -st_row.n_axis_y_std/2
                plt.bar(st_row.n_axis_x, n_bar_height, width=0.1, bottom=n_bar_bottom, color=st_row.s_color)
                
            plt.text(st_row.n_axis_x, st_row.n_axis_y_avg, st_row.s_label, fontsize=12, ha='center')

        #Convert legend from dictionary
        lst_legend : List[plt.Line2D] = list()
        # Scan the legend and sort by value
        for s_key, s_value in sorted(i_d_legend.items(), key=lambda item: item[1]):
            st_legend = plt.Line2D([], [], linestyle='', label=f'{s_value}: {s_key}')
            lst_legend.append(st_legend)

        #if I want to add Structure, Match, Accuracy
        if i_s_legend_type == "sma":
            self.add_sma_match_score_accuracy_to_legend( lst_legend )
        elif i_s_legend_type == "qa":
            self.add_qa_question_answer_to_legend( lst_legend )
        #draw the legends
        #plt.legend(handles=lst_legend, bbox_to_anchor=(1.2, 0.75), loc='lower center')
        plt.legend(
            handles=lst_legend,
            labels=[legend.get_label() for legend in lst_legend],
            labelcolor = [legend.get_color() for legend in lst_legend],
            handler_map={},
            #labelcolor='green',  # Set text color to green globally (or customize per entry)
            bbox_to_anchor=(1.2, 0.75),
            loc='lower center'
        )
        
        plt.grid(True)
        
        # Save the plot as a PNG file
        s_path = path.join( "output_chart", f"{i_s_name_chart}.png" )
        plt.savefig(s_path, bbox_inches='tight')
        plt.close()

        return False #OK

    #--------------------------------------------------------------------------------------------
    #   Specific Charts XY
    #--------------------------------------------------------------------------------------------


    def generate_x_size_y_speed_tps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool:
        """
        I can receive the legend as higher hierarchy. after all all my chart should list model and runtime?
        get in input a legend of models and runtimes used to get the labels
        """

        #row data of the bars
        lst_row : List["St_chart_row"] = list()
        #legend
        lst_legend : List[plt.Line2D] = list()

        for n_index, st_scorecard in enumerate(self.gst_scorecard):
                #LABEL that is linked with the legend
            #Compute model label
            s_model_name = st_scorecard.get_model_name()
            if s_model_name in i_d_legend:
                s_model_label = i_d_legend[s_model_name]
            else:
                s_model_label = "ERR"
            #Compute model label
            s_runtime_name = st_scorecard.get_runtime_name()
            if s_runtime_name in i_d_legend:
                s_runtime_label = i_d_legend[s_runtime_name]
            else:
                s_runtime_label = "ERR"
            
            st_row : St_chart_row= St_chart_row()

            #X axis model size GB
            st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}"
            #chart color
            st_row.s_color = "green"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            st_row : St_chart_row= St_chart_row()
            #X axis model size GB
            st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
            #Y axis, answer per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}"
            st_row.s_color = "pink"

            logging.debug(f"{st_row}")
            lst_row.append(st_row)

        
        #Ask the chart generator to do its thing
        x_fail = self.generate_plot_log( "model_size_vs_speed_tps", "Model Size [GB]", "Speed [T/s]", lst_row, i_d_legend, i_s_legend_type )

        return x_fail


    def generate_x_size_y_speed_cps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool:
        """
        I can receive the legend as higher hierarchy. after all all my chart should list model and runtime?
        get in input a legend of models and runtimes used to get the labels
        """

        #row data of the bars
        lst_row : List["St_chart_row"] = list()
        #legend
        lst_legend : List[plt.Line2D] = list()

        for n_index, st_scorecard in enumerate(self.gst_scorecard):
                #LABEL that is linked with the legend
            #Compute model label
            s_model_name = st_scorecard.get_model_name()
            if s_model_name in i_d_legend:
                s_model_label = i_d_legend[s_model_name]
            else:
                s_model_label = "ERR"
            #Compute model label
            s_runtime_name = st_scorecard.get_runtime_name()
            if s_runtime_name in i_d_legend:
                s_runtime_label = i_d_legend[s_runtime_name]
            else:
                s_runtime_label = "ERR"
            
            st_row : St_chart_row= St_chart_row()

            #X axis model size GB
            st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}"
            #chart color
            st_row.s_color = "green"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            st_row : St_chart_row= St_chart_row()
            #X axis model size GB
            st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
            #Y axis, answer per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}"
            st_row.s_color = "pink"

            logging.debug(f"{st_row}")
            lst_row.append(st_row)

        
        #Ask the chart generator to do its thing
        x_fail = self.generate_plot_log( "model_size_vs_speed_cps", "Model Size [GB]", "Speed [C/s]", lst_row, i_d_legend, i_s_legend_type )

        return x_fail

    def generate_x_size_y_accuracy(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool:
        """
        Here I plot the three accuracies
        -Structure
        -Matching
        -Accuracy
        against the model size

        It's a linear Y scale, fixed 0 to 1
        """

        #row data of the bars
        lst_row : List["St_chart_row"] = list()
        #legend
        lst_legend : List[plt.Line2D] = list()

        for n_index, st_scorecard in enumerate(self.gst_scorecard):
                #LABEL that is linked with the legend
            #Compute model label
            s_model_name = st_scorecard.get_model_name()
            if s_model_name in i_d_legend:
                s_model_label = i_d_legend[s_model_name]
            else:
                s_model_label = "ERR"
            #Compute model label
            s_runtime_name = st_scorecard.get_runtime_name()
            if s_runtime_name in i_d_legend:
                s_runtime_label = i_d_legend[s_runtime_name]
            else:
                s_runtime_label = "ERR"
            
            #   STRUCTURE SCORE
            st_row : St_chart_row= St_chart_row()
            #X axis model size GB
            st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_score_structure.n_avg
            st_row.n_axis_y_std = st_scorecard.n_score_structure.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}"
            #chart color
            st_row.s_color = "orange"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   MATCH SCORE
            st_row : St_chart_row= St_chart_row()
            #X axis model size GB
            st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_score_match.n_avg
            st_row.n_axis_y_std = st_scorecard.n_score_match.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}"
            #chart color
            st_row.s_color = "green"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   MATCH SCORE
            st_row : St_chart_row= St_chart_row()
            #X axis model size GB
            st_row.n_axis_x = st_scorecard.st_stat_model.n_size_on_disk_gb
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_score_accuracy.n_avg
            st_row.n_axis_y_std = st_scorecard.n_score_accuracy.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}"
            #chart color
            st_row.s_color = "cyan"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

        
        #Ask the chart generator to do its thing
        x_fail = self.generate_plot_probability( "model_size_vs_accuracy", "Model Size [GB]", "Accuracy [%]", lst_row, i_d_legend, i_s_legend_type )

        return x_fail


    def generate_x_accuracy_y_speed_tps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool:
        """
        TODO
        I can receive the legend as higher hierarchy. after all all my chart should list model and runtime?
        get in input a legend of models and runtimes used to get the labels
        """

        #row data of the bars
        lst_row : List["St_chart_row"] = list()
        #legend
        lst_legend : List[plt.Line2D] = list()

        for n_index, st_scorecard in enumerate(self.gst_scorecard):
                #LABEL that is linked with the legend
            #Compute model label
            s_model_name = st_scorecard.get_model_name()
            if s_model_name in i_d_legend:
                s_model_label = i_d_legend[s_model_name]
            else:
                s_model_label = "ERR"
            #Compute model label
            s_runtime_name = st_scorecard.get_runtime_name()
            if s_runtime_name in i_d_legend:
                s_runtime_label = i_d_legend[s_runtime_name]
            else:
                s_runtime_label = "ERR"
            
            #   QUESTION - STRUCTURE
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_structure.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
            #chart color
            st_row.s_color = "orange"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   ANSWER - STRUCTURE
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_structure.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
            #chart color
            st_row.s_color = "orange"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   QUESTION - MATCH
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_match.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_match.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
            #chart color
            st_row.s_color = "green"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   ANSWER - MATCH
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_match.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_match.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
            #chart color
            st_row.s_color = "green"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   QUESTION - ACCURACY
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_question_token_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_question_token_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
            #chart color
            st_row.s_color = "cyan"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   ANSWER - ACCURACY
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_answer_token_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_answer_token_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
            #chart color
            st_row.s_color = "cyan"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)


        #Ask the chart generator to do its thing
        x_fail = self.generate_plot_log( "model_accuracy_vs_speed_tps", "Accuracy [%]", "Speed [T/s]", lst_row, i_d_legend, i_s_legend_type )

        return x_fail


    def generate_x_accuracy_y_speed_cps(self, i_d_legend : Dict, i_s_legend_type : str = "" ) -> bool:
        """
        TODO
        I can receive the legend as higher hierarchy. after all all my chart should list model and runtime?
        get in input a legend of models and runtimes used to get the labels
        """

        #row data of the bars
        lst_row : List["St_chart_row"] = list()
        #legend
        lst_legend : List[plt.Line2D] = list()

        for n_index, st_scorecard in enumerate(self.gst_scorecard):
                #LABEL that is linked with the legend
            #Compute model label
            s_model_name = st_scorecard.get_model_name()
            if s_model_name in i_d_legend:
                s_model_label = i_d_legend[s_model_name]
            else:
                s_model_label = "ERR"
            #Compute model label
            s_runtime_name = st_scorecard.get_runtime_name()
            if s_runtime_name in i_d_legend:
                s_runtime_label = i_d_legend[s_runtime_name]
            else:
                s_runtime_label = "ERR"
            
            #   QUESTION - STRUCTURE
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_structure.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
            #chart color
            st_row.s_color = "orange"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   ANSWER - STRUCTURE
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_structure.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_structure.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
            #chart color
            st_row.s_color = "orange"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   QUESTION - MATCH
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_match.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_match.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
            #chart color
            st_row.s_color = "green"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   ANSWER - MATCH
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_match.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_match.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
            #chart color
            st_row.s_color = "green"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   QUESTION - ACCURACY
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_question_char_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_question_char_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nQst"
            #chart color
            st_row.s_color = "cyan"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)

            #   ANSWER - ACCURACY
            #create new row data
            st_row : St_chart_row = St_chart_row()
            #X axis model structure accuracy
            st_row.n_axis_x = st_scorecard.n_score_accuracy.n_avg
            st_row.n_axis_x_std = st_scorecard.n_score_accuracy.n_std
            #Y axis, question per second
            st_row.n_axis_y_avg = st_scorecard.n_speed_answer_char_per_second.n_avg
            st_row.n_axis_y_std = st_scorecard.n_speed_answer_char_per_second.n_std
            #Label
            st_row.s_label = f"{s_model_label}.{s_runtime_label}\nAns"
            #chart color
            st_row.s_color = "cyan"
            logging.debug(f"{st_row}")
            lst_row.append(st_row)


        #Ask the chart generator to do its thing
        x_fail = self.generate_plot_log( "model_accuracy_vs_speed_cps", "Accuracy [%]", "Speed [C/s]", lst_row, i_d_legend, i_s_legend_type )

        return x_fail


 #if executed directly, run the test
 if __name__ == "__main__":
    
    import logging
    from typing import List

    # Setup logging
    logging.basicConfig(
        filename="debug.log",
        #level=logging.DEBUG,
        level=logging.INFO,
        format='[%(asctime)s] %(levelname)s %(module)s:%(lineno)d > %(message)s',
        filemode='w'
    )
    logging.info("BEGIN")

    cl_generator = Cl_generate_chart()
    cl_generator.load_scorecard()

    d_legend = cl_generator.generate_plot_legend()
    logging.info(f"LEGEND: {d_legend}")

    #Speed VS Model size charts
    cl_generator.generate_x_size_y_speed_cps( d_legend, "qa" )
    cl_generator.generate_x_size_y_speed_tps( d_legend, "" )
    cl_generator.generate_x_size_y_accuracy( d_legend, "sma" )
    #Accuracy vs speed chart
    cl_generator.generate_x_accuracy_y_speed_tps( d_legend, "sma" )
    cl_generator.generate_x_accuracy_y_speed_cps( d_legend, "sma" )



    logging.info("END")