Source code for tabensemb.utils.ranking
import os
import pandas as pd
from typing import List, Union
[docs]
def read_lbs(paths: List[Union[os.PathLike, str]]) -> List[pd.DataFrame]:
"""
Read a list of .csv files.
Parameters
----------
paths
A list of paths to .csv files
Returns
-------
list
A list of pd.DataFrame.
"""
dfs = []
for path in paths:
df = pd.read_csv(path, index_col=0)
dfs.append(df)
return dfs
[docs]
def merge_leaderboards(dfs: List[pd.DataFrame]):
"""
Concatenate multiple leaderboards.
"""
df = pd.concat(dfs, ignore_index=True)
metrics = list(df.columns)[2:]
first_metric = metrics[0].split(" ")[-1]
df.sort_values(
by=(
f"Testing {first_metric}"
if f"Testing {first_metric}" in df.columns
else first_metric
),
ascending=True,
inplace=True,
)
df.reset_index(drop=True, inplace=True)
return df
[docs]
def avg_rank(dfs: List[pd.DataFrame]):
"""
Calculate average rankings for all models in all model bases based on leaderboards from multiple executions.
Parameters
----------
dfs
A list of leaderboards from multiple executions.
Returns
-------
pd.DataFrame
A leaderboard of average ranking of multiple executions.
"""
all_program_models = []
each_program_models = []
for df in dfs:
each_program_models.append([(x, y) for x, y in zip(df["Program"], df["Model"])])
all_program_models += each_program_models[-1]
all_program_models = list(set(all_program_models))
avg_df = pd.DataFrame(columns=["Program", "Model"])
avg_df["Program"] = [x for x, y in all_program_models]
avg_df["Model"] = [y for x, y in all_program_models]
for df_idx, (df, program_models) in enumerate(zip(dfs, each_program_models)):
for row_idx, (program, model) in enumerate(all_program_models):
if (program, model) in program_models:
idx = program_models.index((program, model))
avg_df.loc[row_idx, f"Rank {df_idx}"] = list(df.index)[idx] + 1
avg_df["Avg Rank"] = avg_df[[f"Rank {df_idx}" for df_idx in range(len(dfs))]].mean(
axis=1
)
avg_df.sort_values(by="Avg Rank", ascending=True, inplace=True)
avg_df.reset_index(drop=True, inplace=True)
return avg_df
[docs]
def merge_to_excel(
path: Union[os.PathLike, str],
dfs: List[pd.DataFrame],
avg_df: pd.DataFrame,
sheet_names: List[str] = None,
**kwargs,
):
"""
Write leaderboards from multiple executions and the leaderboard of average ranking of multiple executions to a
.xlsx file.
Parameters
----------
path
The path to write the .xlsx file.
dfs
Leaderboards from multiple executions.
avg_df
The leaderboard of average ranking of multiple executions. See :func:`avg_rank`.
sheet_names
Names of ``dfs`` and ``avg_df``.
kwargs
Arguments for ``pd.DataFrame.to_excel``.
"""
avg_sheet_name = "Average"
if sheet_names is None:
sheet_names = [f"Mode {x}" for x in range(len(dfs))]
elif len(sheet_names) == len(dfs) + 1:
avg_sheet_name = sheet_names.pop(-1)
with pd.ExcelWriter(path) as writer:
for df, name in zip(dfs, sheet_names):
df.to_excel(writer, sheet_name=name, **kwargs)
avg_df.to_excel(writer, sheet_name=avg_sheet_name, **kwargs)