rounding

yuzu-ai · Aug 11, 2023 · 262aab0 · 262aab0
1 parent a51af26
commit 262aab0
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 11 deletions.
diff --git a/scripts/elo_benchmark.py b/scripts/elo_benchmark.py
@@ -39,7 +39,7 @@ def convert_to_markdown(json_file, strength_fig_file, template_file, markdown_fi
     table = "| Rank | Model | Strength | Stronger than the next model at confidence level  | \n| :--- | :---: | :---: | :---: |\n"
     for i, rank in enumerate(rankings):
         # assert(round(rank['one_sigma_up'],2) == round(rank['one_sigma_down'],2))
-        table += f"| {i+1} | {make_clickable_model(rank['model_id'])} | {rank['median']:.3f} ± {rank['one_sigma_up']:.2f} | { str(round(rank['stronger_than_next_confidence']*100,1))+'%' if rank['stronger_than_next_confidence']>0 else 'N/A'}\n"
+        table += f"| {i+1} | {make_clickable_model(rank['model_id'])} | {rank['median']:.0f} ± {rank['one_sigma_up']:.0f} | { str(round(rank['stronger_than_next_confidence']*100,1))+'%' if rank['stronger_than_next_confidence']>0 else 'N/A'}\n"
 
     with open(template_file, "r") as f:
         template = f.read()

diff --git a/src/content/pages/benchmark.md b/src/content/pages/benchmark.md
@@ -13,16 +13,16 @@ Please contact us if you have any suggestions or requests for models that you'd
 
 | Rank | Model | Strength | Stronger than the next model at confidence level  | 
 | :--- | :---: | :---: | :---: |
-| 1 | <a target="_blank" href="https://openai.com/" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>openai/GPT-4</a> | 1550.063 ± 61.28 | 99.6%
-| 2 | <a target="_blank" href="https://openai.com/" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>openai/GPT-3.5</a> | 1391.467 ± 50.59 | 100.0%
-| 3 | <a target="_blank" href="https://huggingface.co/stabilityai/StableBeluga2" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>stabilityai/StableBeluga2</a> | 1172.644 ± 39.60 | 97.7%
-| 4 | <a target="_blank" href="https://huggingface.co/BlinkDL/rwkv-4-world" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>blinkdl/rwkv-world-7b-jp-v1</a> | 1075.360 ± 37.17 | 99.2%
-| 5 | <a target="_blank" href="https://ai-novel.com/index.php" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>ainovelist/supertrin</a> | 956.012 ± 33.11 | 85.4%
-| 6 | <a target="_blank" href="https://huggingface.co/stabilityai/japanese-stablelm-instruct-alpha-7b" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>stabilityai/japanese-stablelm-instruct-alpha-7b</a> | 903.474 ± 36.07 | 99.7%
-| 7 | <a target="_blank" href="https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-ppo" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>rinna/japanese-gpt-neox-3.6b-instruction-ppo</a> | 762.803 ± 35.51 | 73.6%
-| 8 | <a target="_blank" href="https://huggingface.co/izumi-lab/stormy-7b-10ep" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>izumi-lab/stormy-7b-10ep</a> | 732.694 ± 35.40 | 54.1%
-| 9 | <a target="_blank" href="https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-sft-v2" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>rinna/japanese-gpt-neox-3.6b-instruction-sft-v2</a> | 727.828 ± 35.83 | 51.3%
-| 10 | <a target="_blank" href="https://huggingface.co/cyberagent/open-calm-7b" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>cyberagent/open-calm-7b</a> | 726.068 ± 36.64 | N/A
+| 1 | <a target="_blank" href="https://openai.com/" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>openai/GPT-4</a> | 1550 ± 61 | 99.6%
+| 2 | <a target="_blank" href="https://openai.com/" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>openai/GPT-3.5</a> | 1391 ± 51 | 100.0%
+| 3 | <a target="_blank" href="https://huggingface.co/stabilityai/StableBeluga2" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>stabilityai/StableBeluga2</a> | 1173 ± 40 | 97.7%
+| 4 | <a target="_blank" href="https://huggingface.co/BlinkDL/rwkv-4-world" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>blinkdl/rwkv-world-7b-jp-v1</a> | 1075 ± 37 | 99.2%
+| 5 | <a target="_blank" href="https://ai-novel.com/index.php" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>ainovelist/supertrin</a> | 956 ± 33 | 85.4%
+| 6 | <a target="_blank" href="https://huggingface.co/stabilityai/japanese-stablelm-instruct-alpha-7b" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>stabilityai/japanese-stablelm-instruct-alpha-7b</a> | 903 ± 36 | 99.7%
+| 7 | <a target="_blank" href="https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-ppo" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>rinna/japanese-gpt-neox-3.6b-instruction-ppo</a> | 763 ± 36 | 73.6%
+| 8 | <a target="_blank" href="https://huggingface.co/izumi-lab/stormy-7b-10ep" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>izumi-lab/stormy-7b-10ep</a> | 733 ± 35 | 54.1%
+| 9 | <a target="_blank" href="https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-sft-v2" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>rinna/japanese-gpt-neox-3.6b-instruction-sft-v2</a> | 728 ± 36 | 51.3%
+| 10 | <a target="_blank" href="https://huggingface.co/cyberagent/open-calm-7b" style={{color: "var(--link-text-color)", textDecoration: "underline",textDecorationStyle: "dotted"}}>cyberagent/open-calm-7b</a> | 726 ± 37 | N/A
 
 
 ![Bradley-Terry strengths of AI assistants on the Rakuda benchmark](/images/charts/rakuda_v1_8-10ranking.png)