From fccfd4e6baab23921f26c33cdb698de0c1954509 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Wed, 23 Oct 2024 17:03:22 +0000 Subject: [PATCH] Automated leaderboard update --- .../weighted_alpaca_eval_gpt4_turbo_leaderboard.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/data_AlpacaEval_2/weighted_alpaca_eval_gpt4_turbo_leaderboard.csv b/docs/data_AlpacaEval_2/weighted_alpaca_eval_gpt4_turbo_leaderboard.csv index 32065a1d..19bcdce4 100644 --- a/docs/data_AlpacaEval_2/weighted_alpaca_eval_gpt4_turbo_leaderboard.csv +++ b/docs/data_AlpacaEval_2/weighted_alpaca_eval_gpt4_turbo_leaderboard.csv @@ -1,4 +1,5 @@ name,length_controlled_winrate,win_rate,avg_length,link,samples,filter +NullModel (adversarial),86.45780691307947,76.91979180386511,872,https://github.com/sail-sg/Cheating-LLM-Benchmarks/,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/NullModel/model_outputs.json,community SelfMoA + gemma-2-9b-it-WPO-HB,78.53928111481099,77.58955217385297,3261,https://github.com/wenzhe-li/Self-MoA/,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/SelfMoA_gemma-2-9b-it-WPO-HB/model_outputs.json,community Shopee SlimMoA v1,77.4515432873834,75.6142865980535,1994,https://github.com/LLM-Alignment-sh/Shopee-SlimMoA,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/Shopee-SlimMoA-v1/model_outputs.json,community Blendax.AI-gm-l6-vo31,76.91981221023656,69.11033492869565,1809,https://www.blendax.ai/post/blendaxai-gm-l6-vo31,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/blendaxai-gm-l6-vo31/model_outputs.json,community