Skip to content

Commit

Permalink
Add Precision
Browse files Browse the repository at this point in the history
  • Loading branch information
apaniukov committed Dec 7, 2023
1 parent 9a14677 commit 6c4ec13
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 55 deletions.
2 changes: 1 addition & 1 deletion modules/custom_operations/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dev = [
"pytest_harvest"
]
transformers = [
"transformers[sentencepiece,tiktoken]"
"transformers[sentencepiece]"
]
tiktoken = [
"tiktoken"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,22 +199,22 @@ This report is autogenerated and includes tokenizers and detokenizers tests. To
<tbody>
<tr>
<td >BPE</td>
<td >92.941176</td>
<td >92.94</td>
<td >850</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >62.000000</td>
<td >62.00</td>
<td >800</td>
</tr>
<tr>
<td >Tiktoken</td>
<td >95.000000</td>
<td >95.00</td>
<td >100</td>
</tr>
<tr>
<td >WordPiece</td>
<td >99.204244</td>
<td >99.20</td>
<td >377</td>
</tr>
</tbody>
Expand All @@ -235,289 +235,289 @@ This report is autogenerated and includes tokenizers and detokenizers tests. To
<tr>
<td >BPE</td>
<td >EleutherAI/gpt-j-6b</td>
<td >96.000000</td>
<td >96.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >EleutherAI/gpt-neo-125m</td>
<td >96.000000</td>
<td >96.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >EleutherAI/gpt-neox-20b</td>
<td >92.000000</td>
<td >92.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >EleutherAI/pythia-12b-deduped</td>
<td >92.000000</td>
<td >92.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >KoboldAI/fairseq-dense-13B</td>
<td >96.000000</td>
<td >96.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >Salesforce/codegen-16B-multi</td>
<td >92.000000</td>
<td >92.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >ai-forever/rugpt3large_based_on_gpt2</td>
<td >94.000000</td>
<td >94.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >bigscience/bloom</td>
<td >98.000000</td>
<td >98.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >facebook/bart-large-mnli</td>
<td >96.000000</td>
<td >96.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >facebook/galactica-120b</td>
<td >94.000000</td>
<td >94.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >facebook/opt-66b</td>
<td >96.000000</td>
<td >96.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >gpt2</td>
<td >96.000000</td>
<td >96.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >laion/CLIP-ViT-bigG-14-laion2B-39B-b160k</td>
<td >60.000000</td>
<td >60.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >microsoft/deberta-base</td>
<td >96.000000</td>
<td >96.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >roberta-base</td>
<td >96.000000</td>
<td >96.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >sentence-transformers/all-roberta-large-v1</td>
<td >96.000000</td>
<td >96.00</td>
<td >50</td>
</tr>
<tr>
<td >BPE</td>
<td >stabilityai/stablecode-completion-alpha-3b-4k</td>
<td >94.000000</td>
<td >94.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >NousResearch/Llama-2-13b-hf</td>
<td >100.000000</td>
<td >100.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >NousResearch/Llama-2-13b-hf_slow</td>
<td >100.000000</td>
<td >100.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >THUDM/chatglm2-6b</td>
<td >50.000000</td>
<td >50.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >THUDM/chatglm2-6b_slow</td>
<td >50.000000</td>
<td >50.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >THUDM/chatglm3-6b</td>
<td >100.000000</td>
<td >100.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >THUDM/chatglm3-6b_slow</td>
<td >100.000000</td>
<td >100.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >camembert-base</td>
<td >26.000000</td>
<td >26.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >camembert-base_slow</td>
<td >26.000000</td>
<td >26.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >codellama/CodeLlama-7b-hf</td>
<td >100.000000</td>
<td >100.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >codellama/CodeLlama-7b-hf_slow</td>
<td >100.000000</td>
<td >100.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >microsoft/deberta-v3-base</td>
<td >94.000000</td>
<td >94.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >microsoft/deberta-v3-base_slow</td>
<td >98.000000</td>
<td >98.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >xlm-roberta-base</td>
<td >0.000000</td>
<td >0.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >xlm-roberta-base_slow</td>
<td >0.000000</td>
<td >0.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >xlnet-base-cased</td>
<td >24.000000</td>
<td >24.00</td>
<td >50</td>
</tr>
<tr>
<td >SentencePiece</td>
<td >xlnet-base-cased_slow</td>
<td >24.000000</td>
<td >24.00</td>
<td >50</td>
</tr>
<tr>
<td >Tiktoken</td>
<td >Qwen/Qwen-14B-Chat</td>
<td >96.000000</td>
<td >96.00</td>
<td >50</td>
</tr>
<tr>
<td >Tiktoken</td>
<td >Salesforce/xgen-7b-8k-base</td>
<td >94.000000</td>
<td >94.00</td>
<td >50</td>
</tr>
<tr>
<td >WordPiece</td>
<td >ProsusAI/finbert</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >bert-base-multilingual-cased</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >bert-large-cased</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >cointegrated/rubert-tiny2</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >distilbert-base-uncased-finetuned-sst-2-english</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >google/electra-base-discriminator</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >google/mobilebert-uncased</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >jhgan/ko-sbert-sts</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >prajjwal1/bert-mini</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >rajiv003/ernie-finetuned-qqp</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >rasa/LaBSE</td>
<td >89.655172</td>
<td >89.66</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >sentence-transformers/all-MiniLM-L6-v2</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
<tr>
<td >WordPiece</td>
<td >squeezebert/squeezebert-uncased</td>
<td >100.000000</td>
<td >100.00</td>
<td >29</td>
</tr>
</tbody>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
def pytest_addoption(parser):
parser.addoption("--update_readme", help="Update test coverage report in README.md")


PASS_RATES_FILE = Path(__file__).parent / "pass_rates.json"


Expand Down Expand Up @@ -58,9 +59,9 @@ def add_tokenizer_type(row):
"To update it run pytest with `--update_readme` flag.\n\n"
"### Coverage by Tokenizer Type\n\n"
)
grouped_by_type.style.hide_index().to_html(new_readme, exclude_styles=True)
grouped_by_type.style.format(precision=2).hide_index().to_html(new_readme, exclude_styles=True)
new_readme.write("\n### Coverage by Model Type\n\n")
grouped_by_model.style.hide_index().to_html(new_readme, exclude_styles=True)
grouped_by_model.style.format(precision=2).hide_index().to_html(new_readme, exclude_styles=True)

with open(readme_path, "w") as f:
f.write(new_readme.getvalue())
Expand Down

0 comments on commit 6c4ec13

Please sign in to comment.