Skip to content

Commit

Permalink
fix leaderboard
Browse files Browse the repository at this point in the history
  • Loading branch information
xiamengzhou committed Jul 17, 2024
1 parent b5b8e18 commit 2ee79f7
Showing 1 changed file with 5 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Nanbeige2-16B-Chat,37.03608605005168,1.4340261272580377,288,514,3,805,35.9627329
claude-3-opus-20240229,29.10526953334248,1.3941539442369442,223,579,3,805,27.888198757763977,minimal,1388,40.5095080124761,
SPPO-Llama-3-Instruct-8B-PairRM,39.67286090605648,1.424722356202499,310,494,1,805,38.57142857142858,community,2066,38.56280663670214,0.8694594533275739
gpt4,23.576789314782605,1.275704201206918,179,618,8,805,22.732919254658384,verified,1365,38.12808974440021,
Infinity-Instruct-3M-0625-Llama3-70B,24.277231851026183,1.3152941480778837,188,613,4,805,23.60248447204969,community,1294,37.97881098506053,0.8189316873655579
aligner-2b_qwen1.5-72b-chat,31.773037737123104,1.2392772646245978,180,473,152,805,31.801242236024844,community,1812,36.725868878524274,
Qwen1.5-72B-Chat,26.49828339562733,1.304236164893057,201,600,4,805,25.217391304347824,verified,1549,36.571754111987296,
gpt4_0314,22.073258928708075,1.2466725494608204,172,627,6,805,21.73913043478261,verified,1371,35.30706121640206,
Expand All @@ -38,6 +39,7 @@ SPPO-Mistral7B-PairRM-ExPO,35.4431306716895,1.398130896602677,274,531,0,805,34.0
merlinite-7B-AOT,29.89635084070223,1.3666520485228832,234,571,0,805,29.068322981366464,community,1855,31.721885287042845,0.8150560619387706
Infinity-Instruct-3M-0613-Llama3-70B,19.265008711394984,1.1892676587571642,143,658,4,805,18.012422360248447,community,1192,31.525606214845013,0.7743157557419648
Samba-CoE-v0.2-best-of-16,26.988254318335404,1.3189030000371738,201,601,3,805,25.15527950310559,community,1578,31.506544268148147,
Infinity-Instruct-3M-0625-Mistral-7B,21.087714332440324,1.2475078604303778,165,638,2,805,20.62111801242236,community,1305,31.42101004652769,0.7967814834411304
REBEL-Llama-3-8B-Instruct,34.30642383142354,1.3914900255573264,268,537,0,805,33.29192546583851,community,2372,31.40409226280724,
Mixtral-8x22B-Instruct-v0.1,22.21017054750302,1.2780740057417268,174,628,3,805,21.801242236024844,verified,1445,30.878810294279383,
SPPO-Mistral7B-PairRM,32.2453123637764,1.3908000109577154,249,556,0,805,30.93167701863354,community,2114,30.494137965217423,
Expand All @@ -49,6 +51,7 @@ pairrm-Yi-34B-Chat,31.24128294680746,1.34824373994879,239,563,3,805,29.875776397
mistral-medium,21.855772543652176,1.2682402187223842,164,639,2,805,20.496894409937887,verified,1500,28.614337401726104,
claude-2,17.188240356708075,1.17482825615589,131,673,1,805,16.335403726708076,verified,1069,28.155196141629148,
Samba-CoE-v0.2,21.847378669267083,1.2171089783436106,159,645,1,805,19.81366459627329,community,1469,27.62426735006872,
Infinity-Instruct-3M-0625-Llama3-8B,19.364378673728307,1.1904760520311013,147,655,3,805,18.4472049689441,community,1336,27.518835489680203,0.7623277408602316
claude,16.98534361236025,1.1687959793014906,129,676,0,805,16.024844720496894,verified,1082,27.289504443727107,
internlm2-chat-20b-ExPO,46.185367468861,1.4638315245977938,375,430,0,805,46.58385093167702,community,3335,27.225759480731792,
Yi-34B-Chat,29.65994671879504,1.3225712597906096,219,582,4,805,27.45341614906832,verified,2123,27.19054787762733,
Expand All @@ -73,10 +76,12 @@ gpt-3.5-turbo-16k-0613,14.13239070746584,1.027579400264853,96,704,5,805,12.23602
internlm2-chat-7b-ExPO,28.067817437082898,1.3159792318125112,209,595,1,805,26.02484472049689,community,2390,22.66748024879648,
gpt-3.5-turbo-0613,14.09579857390062,1.0371186215049395,99,700,6,805,12.670807453416147,community,1331,22.35251298054288,
gpt-3.5-turbo-1106_verbose,12.76316981026087,1.044246819212278,94,709,2,805,11.801242236024844,dev,1058,22.00093702171442,
Infinity-Instruct-3M-0625-Qwen2-7B,15.322182555525842,1.0986373100856872,118,685,2,805,14.782608695652174,community,1315,21.87399673499932,0.6990992627857084
gpt4_0613_concise,9.400320574596272,0.901021275896262,71,729,5,805,9.130434782608695,dev,627,21.57799091454269,
pairrm-tulu-2-70b,18.638962967441,1.1924966700012911,140,665,0,805,17.391304347826086,community,1607,21.428403975507223,
tulu-2-dpo-70b,15.982854374136648,1.1457861368237434,119,683,3,805,14.96894409937888,verified,1418,21.238610038371124,
Mistral-7B-ReMax-v0.1,15.999331369031056,1.1288683901451453,120,683,2,805,15.031055900621118,community,1478,20.55136770233589,
Infinity-Instruct-3M-0625-Yi-1.5-9B,16.203844277153284,1.1057840624447524,123,681,1,805,15.341614906832298,community,1449,20.538372631222003,0.6401022229216694
Starling-LM-7B-alpha-ExPO,18.17975592036216,1.2498324795896385,148,657,0,805,18.385093167701864,community,1821,19.4741654606294,
gpt-3.5-turbo-1106,9.177964561962735,0.8904117511864436,64,737,4,805,8.198757763975156,verified,796,19.30058903498905,
LMCocktail-10.7B-v1,13.153430917391304,1.045719535661201,104,700,1,805,12.981366459627331,community,1203,18.950710386651053,
Expand Down

0 comments on commit 2ee79f7

Please sign in to comment.