Fix failed import of ST RoBERTa models (#637)

Fixes an error uploading the sentence-transformers/all-distilroberta-v1 model which failed with "missing 2 required positional arguments: 'token_type_ids' and 'position_ids'". The cause was that the tokenizer type was not recognised due to a typo
elastic · Nov 21, 2023 · 081250c · 081250c
1 parent af26897
commit 081250c
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 1 deletion.
diff --git a/eland/ml/pytorch/transformers.py b/eland/ml/pytorch/transformers.py
@@ -311,7 +311,7 @@ def from_pretrained(
             (
                 transformers.BartTokenizer,
                 transformers.MPNetTokenizer,
-                transformers.RobertaConfig,
+                transformers.RobertaTokenizer,
                 transformers.XLMRobertaTokenizer,
             ),
         ):

diff --git a/tests/ml/pytorch/test_pytorch_model_config_pytest.py b/tests/ml/pytorch/test_pytorch_model_config_pytest.py
@@ -77,6 +77,14 @@
 # have been imported
 if HAS_PYTORCH and HAS_SKLEARN and HAS_TRANSFORMERS:
     MODEL_CONFIGURATIONS = [
+        (
+            "sentence-transformers/all-distilroberta-v1",
+            "text_embedding",
+            TextEmbeddingInferenceOptions,
+            NlpRobertaTokenizationConfig,
+            512,
+            768,
+        ),
         (
             "intfloat/multilingual-e5-small",
             "text_embedding",