From 6537bb7dddab6e0d03077907192433e0e56eb636 Mon Sep 17 00:00:00 2001 From: Yann Dubois Date: Wed, 8 Nov 2023 01:45:41 -0800 Subject: [PATCH] gpt4 turbo -> minimal --- docs/alpaca_eval_gpt4_leaderboard.csv | 2 +- .../data_AlpacaEval/alpaca_eval_gpt4_leaderboard.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/alpaca_eval_gpt4_leaderboard.csv b/docs/alpaca_eval_gpt4_leaderboard.csv index 89f5041a..38b89514 100644 --- a/docs/alpaca_eval_gpt4_leaderboard.csv +++ b/docs/alpaca_eval_gpt4_leaderboard.csv @@ -1,5 +1,5 @@ name,win_rate,avg_length,link,samples,filter -GPT-4 Turbo,97.69900497512438,2049,,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/gpt4_turbo/model_outputs.json,community +GPT-4 Turbo,97.69900497512438,2049,,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/gpt4_turbo/model_outputs.json,minimal XwinLM 70b V0.1,95.56803995,1775,https://github.com/Xwin-LM/Xwin-LM,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/xwinlm-70b-v0.1/model_outputs.json,community GPT-4,95.27950311,1365,,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/gpt4/model_outputs.json,minimal LLaMA2 Chat 70B,92.66169154,1790,https://ai.meta.com/llama/,https://github.com/tatsu-lab/alpaca_eval/blob/main/results/llama-2-70b-chat-hf/model_outputs.json,minimal diff --git a/src/alpaca_eval/leaderboards/data_AlpacaEval/alpaca_eval_gpt4_leaderboard.csv b/src/alpaca_eval/leaderboards/data_AlpacaEval/alpaca_eval_gpt4_leaderboard.csv index 35f9ee3d..a8b36954 100644 --- a/src/alpaca_eval/leaderboards/data_AlpacaEval/alpaca_eval_gpt4_leaderboard.csv +++ b/src/alpaca_eval/leaderboards/data_AlpacaEval/alpaca_eval_gpt4_leaderboard.csv @@ -1,5 +1,5 @@ ,win_rate,standard_error,n_wins,n_wins_base,n_draws,n_total,mode,avg_length -gpt4_turbo,97.69900497512438,0.5104849118311993,783,16,5,804,community,2049 +gpt4_turbo,97.69900497512438,0.5104849118311993,783,16,5,804,minimal,2049 xwinlm-70b-v0.1,95.56803995,0.724941926,765,35,1,801,community,1775 gpt4,95.27950311,0.71628144,761,32,12,805,minimal,1365 llama-2-70b-chat-hf,92.66169154,0.911762258,743,57,4,804,minimal,1790