Skip to content

Commit

Permalink
Update tokenizer unit tests (#919)
Browse files Browse the repository at this point in the history
* Ignore kobert from unit tests (now requires `trust_remote_code=True`

* Remove tokenizers without chat template
  • Loading branch information
xenova authored Sep 3, 2024
1 parent 50d5620 commit f43d3dd
Showing 1 changed file with 3 additions and 14 deletions.
17 changes: 3 additions & 14 deletions tests/generate_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@

# TODO: remove when https://github.com/huggingface/transformers/issues/28096 is addressed
'RajuKandasamy/tamillama_tiny_30m',

# Requires `trust_remote_code`
'monologg/kobert',
]

MAX_TESTS = {
Expand Down Expand Up @@ -228,10 +231,6 @@

TOKENIZERS_WITH_CHAT_TEMPLATES = {
# https://huggingface.co/docs/transformers/main/en/chat_templating
'Xenova/blenderbot-400M-distill': [
'basic',
],

'Xenova/mistral-tokenizer-v1': [
'basic',
],
Expand All @@ -240,16 +239,6 @@
'system',
],

'Xenova/llama-tokenizer': [
'basic',
'system',
'system + assistant',
],
'Xenova/llama2-tokenizer': [
'basic',
'system',
'system + assistant',
],
'Xenova/llama2-chat-tokenizer': [
'basic',
'system',
Expand Down

0 comments on commit f43d3dd

Please sign in to comment.