Skip to content

Commit

Permalink
Merge pull request #5 from miragecoa/main
Browse files Browse the repository at this point in the history
Normalize all columns using column wise max-minumum
  • Loading branch information
miragecoa authored Sep 10, 2024
2 parents 8b37d0c + d8cfaee commit b98ed26
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 20 deletions.
8 changes: 0 additions & 8 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,8 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
AutoEvalColumn.model_type_symbol.name,
AutoEvalColumn.model.name,
]

# Ensure no duplicates and add the new average columns
unique_columns = set(always_here_cols + columns)

# We use COLS to maintain sorting
filtered_df = df[[c for c in COLS if c in df.columns and c in unique_columns]]

# Debugging print to see if the new columns are included
print(f"Columns included in DataFrame: {filtered_df.columns.tolist()}")

return filtered_df


Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,6 @@ transformers==4.35.2
tokenizers>=0.15.0
git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463#egg=lm-eval
accelerate==0.24.1
pydantic==2.9.1
fastapi==0.112.4
sentencepiece
18 changes: 9 additions & 9 deletions src/display/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ class ColumnContent:
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, category="Model Information", never_hidden=True)])

auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True, category="Model Information")])
auto_eval_column_dict.append(["average_IE", ColumnContent, ColumnContent("Average IE ⬆️", "number", False, category="Information Extraction (IE)")])
auto_eval_column_dict.append(["average_TA", ColumnContent, ColumnContent("Average TA ⬆️", "number", False, category="Textual Analysis (TA)")])
auto_eval_column_dict.append(["average_QA", ColumnContent, ColumnContent("Average QA ⬆️", "number", False, category="Question Answering (QA)")])
auto_eval_column_dict.append(["average_TG", ColumnContent, ColumnContent("Average TG ⬆️", "number", False, category="Text Generation (TG)")])
auto_eval_column_dict.append(["average_RM", ColumnContent, ColumnContent("Average RM ⬆️", "number", False, category="Risk Management (RM)")])
auto_eval_column_dict.append(["average_FO", ColumnContent, ColumnContent("Average FO ⬆️", "number", False, category="Forecasting (FO)")])
auto_eval_column_dict.append(["average_DM", ColumnContent, ColumnContent("Average DM ⬆️", "number", False, category="Decision-Making (DM)")])
auto_eval_column_dict.append(["average_Spanish", ColumnContent, ColumnContent("Average Spanish ⬆️", "number", False, category="Spanish")])
auto_eval_column_dict.append(["average_IE", ColumnContent, ColumnContent("Average IE ⬆️", "number", True, category="Information Extraction (IE)")])
auto_eval_column_dict.append(["average_TA", ColumnContent, ColumnContent("Average TA ⬆️", "number", True, category="Textual Analysis (TA)")])
auto_eval_column_dict.append(["average_QA", ColumnContent, ColumnContent("Average QA ⬆️", "number", True, category="Question Answering (QA)")])
auto_eval_column_dict.append(["average_TG", ColumnContent, ColumnContent("Average TG ⬆️", "number", True, category="Text Generation (TG)")])
auto_eval_column_dict.append(["average_RM", ColumnContent, ColumnContent("Average RM ⬆️", "number", True, category="Risk Management (RM)")])
auto_eval_column_dict.append(["average_FO", ColumnContent, ColumnContent("Average FO ⬆️", "number", True, category="Forecasting (FO)")])
auto_eval_column_dict.append(["average_DM", ColumnContent, ColumnContent("Average DM ⬆️", "number", True, category="Decision-Making (DM)")])
auto_eval_column_dict.append(["average_Spanish", ColumnContent, ColumnContent("Average Spanish ⬆️", "number", True, category="Spanish")])

auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False, category="Model Information")])
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False, category="Model Information")])
Expand All @@ -49,7 +49,7 @@ class ColumnContent:
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, category="Model Information", hidden=False)])

for task in Tasks:
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True, category=task.value.category)])
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", False, category=task.value.category)])

# We use make_dataclass to dynamically fill the scores from Tasks
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
Expand Down
2 changes: 1 addition & 1 deletion src/leaderboard/read_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def to_dict(self):
category_averages["average_FO"].append(score)
elif task.value.category == "Decision-Making (DM)":
if task.value.benchmark == "FinTrade" and score != "missing":
category_averages["average_DM"].append((score + 3)/6)
category_averages["average_DM"].append((score + 300)/6)
else:
category_averages["average_DM"].append(score)
elif task.value.category == "Spanish":
Expand Down
35 changes: 33 additions & 2 deletions src/populate.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,37 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm

df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)

def normalize_column(df: pd.DataFrame, col: str):
"""Normalize a column to a 0-100 range based on its min and max values.
Non-numeric values will be treated as 0."""
# Convert non-numeric values to 0
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

min_val = df[col].min()
max_val = df[col].max()

# Avoid division by zero if max == min
if max_val != min_val:
df[col] = df[col].apply(lambda x: ((x - min_val) / (max_val - min_val)) * 100)
else:
df[col] = 100 # if all values are the same, set them to 100 (since they are all "max")

def normalize_all_columns(df: pd.DataFrame):
"""Normalize all columns in the DataFrame to a 0-100 range, skipping boolean and string columns."""
for col in df.columns:
if pd.api.types.is_bool_dtype(df[col]):
continue
elif pd.api.types.is_string_dtype(df[col]):
continue
elif pd.api.types.is_numeric_dtype(df[col]):
normalize_column(df, col)
return df

# Example usage
df = normalize_all_columns(df)

'''
print(df.columns)
# Apply the transformation for MCC values
mcc_tasks = ["German", "Australian", "LendingClub", "ccf", "ccfraud", "polish", "taiwan", "portoseguro", "travelinsurance"]
for task in mcc_tasks:
Expand All @@ -40,8 +71,8 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
for index, row in df.iterrows():
if "FinTrade" in row and row["FinTrade"] != "missing":
df.loc[index, "FinTrade"] = (row["FinTrade"] + 3) / 6

df.loc[index, "FinTrade"] = (row["FinTrade"] + 300) / 6
'''
# Now, select the columns that were passed to the function
df = df[cols]

Expand Down

0 comments on commit b98ed26

Please sign in to comment.