-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
send elo file to public website repo
- Loading branch information
Showing
1 changed file
with
1 addition
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
{"date": "2023-06-28T18:10:55.509719", "model_metadata": {"gpt-3.5-turbo-0301:20230614": {"model_id": "gpt-3.5-turbo-0301:20230614", "path": "answers/rakuda_v1/gpt3.jsonl"}, "rinna/japanese-gpt-neox-3.6b-instruction-ppo": {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-ppo", "path": "answers/rakuda_v1/rinna-ppo.jsonl"}, "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2": {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2", "path": "answers/rakuda_v1/rinna-sft.jsonl"}, "rinna/japanese-gpt-neox-3.6b": {"model_id": "rinna/japanese-gpt-neox-3.6b", "path": "answers/rakuda_v1/rinna.jsonl"}, "izumi-lab/stormy-7b-10ep": {"model_id": "izumi-lab/stormy-7b-10ep", "path": "answers/rakuda_v1/stormy.jsonl"}, "cyberagent/open-calm-7b": {"model_id": "cyberagent/open-calm-7b", "path": "answers/rakuda_v1/calm.jsonl"}}, "metadata": {"questions_path": "questions/rakuda_v1.jsonl", "reviewer_path": "prompts/rakuda_reviewer.jsonl", "prompt_path": "prompts/rakuda_prompt.jsonl"}, "ranking": [{"model_id": "rinna/japanese-gpt-neox-3.6b", "short_name": "rinna-3.6b", "median": -0.7503310531398228, "one_sigma_down": 0.10017338424683819, "one_sigma_up": 0.0991624823148497, "win_rate": 0.35125, "stronger_than_next_confidence": 0, "implied_win_probability": 0}, {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2", "short_name": "rinna-3.6b-SFT", "median": -0.7188850623809692, "one_sigma_down": 0.10084014078765635, "one_sigma_up": 0.09975039004731556, "win_rate": 0.35875, "stronger_than_next_confidence": 0.5853008474576271, "implied_win_probability": 0.5078608499333815}, {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-ppo", "short_name": "rinna-3.6b-PPO", "median": -0.5753167201785611, "one_sigma_down": 0.09831405058106879, "one_sigma_up": 0.09829877789967173, "win_rate": 0.3925, "stronger_than_next_confidence": 0.8377531779661017, "implied_win_probability": 0.5358305621120212}, {"model_id": "izumi-lab/stormy-7b-10ep", "short_name": "stormy-7b", "median": -0.3835133333724112, "one_sigma_down": 0.09694851881151118, "one_sigma_up": 0.09754290728280562, "win_rate": 0.43875, "stronger_than_next_confidence": 0.9087330508474576, "implied_win_probability": 0.5478043820331442}, {"model_id": "cyberagent/open-calm-7b", "short_name": "open-calm-7b", "median": -0.06256778701634562, "one_sigma_down": 0.09747332104552651, "one_sigma_up": 0.09736005425987745, "win_rate": 0.515, "stronger_than_next_confidence": 0.9867192796610169, "implied_win_probability": 0.5795546717417311}, {"model_id": "gpt-3.5-turbo-0301:20230614", "short_name": "gpt-3.5", "median": 2.4877740910918504, "one_sigma_down": 0.17804337562614858, "one_sigma_up": 0.188354483423951, "win_rate": 0.94375, "stronger_than_next_confidence": 1.0, "implied_win_probability": 0.9275964789568163}]} | ||
{"date": "2023-07-14T15:20:53.234820", "model_metadata": {"gpt-4:20230713": {"model_id": "gpt-4:20230713", "path": "answers/rakuda_v1/gpt4.jsonl"}, "gpt-3.5-turbo-0301:20230614": {"model_id": "gpt-3.5-turbo-0301:20230614", "path": "answers/rakuda_v1/gpt3.jsonl"}, "rinna/japanese-gpt-neox-3.6b-instruction-ppo": {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-ppo", "path": "answers/rakuda_v1/rinna-ppo.jsonl"}, "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2": {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2", "path": "answers/rakuda_v1/rinna-sft.jsonl"}, "rinna/japanese-gpt-neox-3.6b": {"model_id": "rinna/japanese-gpt-neox-3.6b", "path": "answers/rakuda_v1/rinna.jsonl"}, "izumi-lab/stormy-7b-10ep": {"model_id": "izumi-lab/stormy-7b-10ep", "path": "answers/rakuda_v1/stormy.jsonl"}, "cyberagent/open-calm-7b": {"model_id": "cyberagent/open-calm-7b", "path": "answers/rakuda_v1/calm.jsonl"}, "rwkv-world-jpn-55": {"model_id": "rwkv-world-jpn-55", "path": "answers/rakuda_v1/rwkv.jsonl"}, "super-torin-sama-alpha2": {"model_id": "super-torin-sama-alpha2", "path": "answers/rakuda_v1/super-torin.jsonl"}}, "metadata": {"questions_path": "questions/rakuda_v1.jsonl", "reviewer_path": "prompts/gpt4_reviewer.jsonl", "prompt_path": "prompts/rakuda_prompt.jsonl"}, "ranking": [{"model_id": "rinna/japanese-gpt-neox-3.6b", "median": 698.7910267541656, "one_sigma_up": 36.39529035193573, "one_sigma_down": 37.09254466604614, "stronger_than_next_confidence": -1.0, "win_rate": 0.1657754010695187, "short_name": "rinna-3.6b"}, {"model_id": "cyberagent/open-calm-7b", "median": 791.3143222750169, "one_sigma_up": 32.78265513124438, "one_sigma_down": 33.679762198974345, "stronger_than_next_confidence": 0.9715525, "win_rate": 0.27325581395348836, "short_name": "open-calm-7b"}, {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2", "median": 810.8620649003119, "one_sigma_up": 33.40152463323159, "one_sigma_down": 34.14239430217083, "stronger_than_next_confidence": 0.6575575, "win_rate": 0.29874213836477986, "short_name": "rinna-3.6b-SFT"}, {"model_id": "izumi-lab/stormy-7b-10ep", "median": 845.2533406403094, "one_sigma_up": 32.50929823949912, "one_sigma_down": 33.47132870793223, "stronger_than_next_confidence": 0.7662375, "win_rate": 0.3282208588957055, "short_name": "stormy-7b"}, {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-ppo", "median": 858.878798034552, "one_sigma_up": 33.66522340270626, "one_sigma_down": 32.78757766127194, "stronger_than_next_confidence": 0.61937, "win_rate": 0.38741721854304634, "short_name": "rinna-3.6b-PPO"}, {"model_id": "super-torin-sama-alpha2", "median": 1011.0492399694623, "one_sigma_up": 31.882177939431926, "one_sigma_down": 31.22267038079849, "stronger_than_next_confidence": 0.99954, "win_rate": 0.5706214689265536, "short_name": "supertrin"}, {"model_id": "rwkv-world-jpn-55", "median": 1076.450569713049, "one_sigma_up": 34.04521187391629, "one_sigma_down": 32.972197059167456, "stronger_than_next_confidence": 0.9119575, "win_rate": 0.5338983050847458, "short_name": "rwkv-world"}, {"model_id": "gpt-3.5-turbo-0301:20230614", "median": 1384.6403103005816, "one_sigma_up": 44.63811984801714, "one_sigma_down": 43.16825198495212, "stronger_than_next_confidence": 1.0, "win_rate": 0.8442211055276382, "short_name": "gpt-3.5"}, {"model_id": "gpt-4:20230713", "median": 1520.9151737171537, "one_sigma_up": 54.08307464054769, "one_sigma_down": 49.87973981999494, "stronger_than_next_confidence": 0.992815, "win_rate": 0.9255813953488372, "short_name": "gpt-4"}]} | ||
{"date": "2023-08-10T16:56:13.527072", "model_metadata": {"gpt-4:20230713": {"model_id": "gpt-4:20230713", "path": "answers/rakuda_v1/gpt4.jsonl"}, "gpt-3.5-turbo-0301:20230614": {"model_id": "gpt-3.5-turbo-0301:20230614", "path": "answers/rakuda_v1/gpt3.jsonl"}, "rinna/japanese-gpt-neox-3.6b-instruction-ppo": {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-ppo", "path": "answers/rakuda_v1/rinna-ppo.jsonl"}, "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2": {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2", "path": "answers/rakuda_v1/rinna-sft.jsonl"}, "izumi-lab/stormy-7b-10ep": {"model_id": "izumi-lab/stormy-7b-10ep", "path": "answers/rakuda_v1/stormy.jsonl"}, "cyberagent/open-calm-7b": {"model_id": "cyberagent/open-calm-7b", "path": "answers/rakuda_v1/calm.jsonl"}, "rwkv-world-jp-v1": {"model_id": "rwkv-world-jp-v1", "path": "answers/rakuda_v1/rwkv-jp-v1.jsonl"}, "stabilityai/StableBeluga2": {"model_id": "stabilityai/StableBeluga2", "path": "answers/rakuda_v1/stablebeluga2.jsonl"}, "super-torin-sama-alpha2": {"model_id": "super-torin-sama-alpha2", "path": "answers/rakuda_v1/super-trin.jsonl"}, "stabilityai/japanese-stablelm-instruct-alpha-7b": {"model_id": "stabilityai/japanese-stablelm-instruct-alpha-7b", "path": "answers/rakuda_v1/japanese-stablelm-instruct-alpha.jsonl"}}, "metadata": {"questions_path": "questions/rakuda_v1.jsonl", "reviewer_path": "prompts/gpt4_reviewer.jsonl", "prompt_path": "prompts/rakuda_prompt.jsonl"}, "ranking": [{"model_id": "cyberagent/open-calm-7b", "median": 726.0678932946905, "one_sigma_up": 36.64384759455004, "one_sigma_down": 37.18008464306649, "stronger_than_next_confidence": -1.0, "win_rate": 0.22865853658536586, "short_name": "opencalm-7b"}, {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2", "median": 727.8284469389451, "one_sigma_up": 35.825174287253844, "one_sigma_down": 37.24934582245612, "stronger_than_next_confidence": 0.5125671052631579, "win_rate": 0.2484076433121019, "short_name": "rinna-3.6b (SFT)"}, {"model_id": "izumi-lab/stormy-7b-10ep", "median": 732.6940609597991, "one_sigma_up": 35.399153231225, "one_sigma_down": 36.202758306949704, "stronger_than_next_confidence": 0.5412184210526316, "win_rate": 0.23053892215568864, "short_name": "opencalm-7b (stormy)"}, {"model_id": "rinna/japanese-gpt-neox-3.6b-instruction-ppo", "median": 762.8027865582023, "one_sigma_up": 35.5077764917022, "one_sigma_down": 35.80021721707169, "stronger_than_next_confidence": 0.7356473684210526, "win_rate": 0.29545454545454547, "short_name": "rinna-3.6b (PPO)"}, {"model_id": "stabilityai/japanese-stablelm-instruct-alpha-7b", "median": 903.4740581593669, "one_sigma_up": 36.07498449159061, "one_sigma_down": 36.53829058803535, "stronger_than_next_confidence": 0.9966828947368421, "win_rate": 0.39338235294117646, "short_name": "ja-stablelm-7b (instruct-alpha)"}, {"model_id": "super-torin-sama-alpha2", "median": 956.012156053497, "one_sigma_up": 33.111580632497294, "one_sigma_down": 32.790184020581705, "stronger_than_next_confidence": 0.8535013157894736, "win_rate": 0.5171428571428571, "short_name": "supertrin"}, {"model_id": "rwkv-world-jp-v1", "median": 1075.3603532210454, "one_sigma_up": 37.17049873102087, "one_sigma_down": 36.553948098262936, "stronger_than_next_confidence": 0.9919868421052631, "win_rate": 0.5912408759124088, "short_name": "rwkv-world-7b (jp-v1)"}, {"model_id": "stabilityai/StableBeluga2", "median": 1172.6439694450753, "one_sigma_up": 39.602436920521086, "one_sigma_down": 39.18572361829388, "stronger_than_next_confidence": 0.97745, "win_rate": 0.7007299270072993, "short_name": "llama2-70b (StableBeluga2)"}, {"model_id": "gpt-3.5-turbo-0301:20230614", "median": 1391.4667363384747, "one_sigma_up": 50.58701644193843, "one_sigma_down": 47.048414116849926, "stronger_than_next_confidence": 0.9997618421052632, "win_rate": 0.8387096774193549, "short_name": "gpt-3.5"}, {"model_id": "gpt-4:20230713", "median": 1550.06316459277, "one_sigma_up": 61.2801855730097, "one_sigma_down": 58.41700819772973, "stronger_than_next_confidence": 0.995825, "win_rate": 0.9233128834355828, "short_name": "gpt-4"}]} |