Compatibility fixes

ThilinaRajapakse · May 29, 2024 · 919f69d · 919f69d
1 parent f89bc1b
commit 919f69d
Show file tree

Hide file tree

Showing 9 changed files with 161 additions and 59 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.70.1] - 2024-02-15
+
+### Fixed
+
+- Fixed compatibility issues with transformers >= 4.41.0
+
+### Changed
+
+- QoL changes for MonoT5 style models
+
 ## [0.70.0] - 2024-02-15
 
 ### Added

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="simpletransformers",
-    version="0.70.0",
+    version="0.70.1",
     author="Thilina Rajapakse",
     author_email="[email protected]",
     description="An easy-to-use wrapper library for the Transformers library.",

diff --git a/simpletransformers/classification/classification_model.py b/simpletransformers/classification/classification_model.py
@@ -1926,7 +1926,7 @@ def compute_metrics(
 
         mcc = matthews_corrcoef(labels, preds)
         accuracy = accuracy_score(labels, preds)
-        f1 = f1_score(labels, preds)
+        f1 = f1_score(labels, preds, average="macro")
         if self.model.num_labels == 2:
             tn, fp, fn, tp = confusion_matrix(labels, preds, labels=[0, 1]).ravel()
             if self.args.sliding_window:

diff --git a/simpletransformers/classification/multi_modal_classification_model.py b/simpletransformers/classification/multi_modal_classification_model.py
@@ -38,7 +38,6 @@
 from torch.optim import AdamW
 from transformers.optimization import Adafactor
 from transformers import (
-    BERT_PRETRAINED_MODEL_ARCHIVE_LIST,
     WEIGHTS_NAME,
     BertConfig,
     BertModel,
@@ -178,24 +177,6 @@ def __init__(
             self.config, self.transformer, self.img_encoder
         )
 
-        if model_name not in BERT_PRETRAINED_MODEL_ARCHIVE_LIST:
-            try:
-                self.model.load_state_dict(
-                    torch.load(os.path.join(model_name, "pytorch_model.bin"))
-                )
-            except EnvironmentError:
-                msg = (
-                    "Model name '{}' was not found in model name list ({}). "
-                    "We assumed '{}' was a path or url to model weight files named one of {} but "
-                    "couldn't find any such file at this path or url.".format(
-                        model_name,
-                        ", ".join(BERT_PRETRAINED_MODEL_ARCHIVE_LIST),
-                        model_name,
-                        "pytorch_model.bin",
-                    )
-                )
-                raise EnvironmentError(msg)
-
         self.tokenizer = tokenizer_class.from_pretrained(
             model_name, do_lower_case=self.args.do_lower_case, **kwargs
         )

diff --git a/simpletransformers/config/model_args.py b/simpletransformers/config/model_args.py
@@ -224,6 +224,7 @@ class T5Args(ModelArgs):
 
     model_class: str = "T5Model"
     add_prefix: bool = True
+    as_reranker: bool = False
     dataset_class: Dataset = None
     do_sample: bool = False
     early_stopping: bool = True

diff --git a/simpletransformers/custom_models/models.py b/simpletransformers/custom_models/models.py
@@ -34,15 +34,8 @@
     BigBirdPreTrainedModel,
 )
 from transformers.models.camembert.configuration_camembert import CamembertConfig
-from transformers.models.camembert.modeling_camembert import (
-    CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
-)
 from transformers.models.distilbert.configuration_distilbert import DistilBertConfig
-from transformers.models.distilbert.modeling_distilbert import (
-    DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
-)
 from transformers.models.electra.modeling_electra import (
-    ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST,
     ElectraConfig,
     ElectraModel,
     ElectraPreTrainedModel,
@@ -59,17 +52,13 @@
 from transformers.models.rembert.configuration_rembert import RemBertConfig
 from transformers.models.roberta.configuration_roberta import RobertaConfig
 from transformers.models.roberta.modeling_roberta import (
-    ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
     RobertaClassificationHead,
     RobertaForQuestionAnswering,
     RobertaPreTrainedModel,
     RobertaLMHead,
     MaskedLMOutput,
 )
 from transformers.models.xlm_roberta.configuration_xlm_roberta import XLMRobertaConfig
-from transformers.models.xlm_roberta.modeling_xlm_roberta import (
-    XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
-)
 from simpletransformers.custom_models.retrieval_autoencoder import Autoencoder
 
 
@@ -237,7 +226,6 @@ class RobertaForMultiLabelSequenceClassification(BertPreTrainedModel):
     """
 
     config_class = RobertaConfig
-    pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST
     base_model_prefix = "roberta"
 
     def __init__(self, config, pos_weight=None):
@@ -304,7 +292,6 @@ class CamembertForMultiLabelSequenceClassification(
     """
 
     config_class = CamembertConfig
-    pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST
     base_model_prefix = "camembert"
 
 
@@ -430,7 +417,6 @@ class DistilBertPreTrainedModel(PreTrainedModel):
     """
 
     config_class = DistilBertConfig
-    pretrained_model_archive_map = DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST
     load_tf_weights = None
     base_model_prefix = "distilbert"
 
@@ -664,7 +650,6 @@ class XLMRobertaForMultiLabelSequenceClassification(
     RobertaForMultiLabelSequenceClassification
 ):
     config_class = XLMRobertaConfig
-    pretrained_model_archive_map = XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST
 
 
 class ElectraPooler(nn.Module):
@@ -773,7 +758,6 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel):
             Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
     """  # noqa
     config_class = ElectraConfig
-    pretrained_model_archive_map = ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST
     base_model_prefix = "electra"
 
     def __init__(self, config, weight=None):
@@ -829,7 +813,6 @@ class ElectraForMultiLabelSequenceClassification(ElectraPreTrainedModel):
     """
 
     config_class = ElectraConfig
-    pretrained_model_archive_map = ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST
     base_model_prefix = "electra"
 
     def __init__(self, config, pos_weight=None):
@@ -883,7 +866,6 @@ class ElectraForQuestionAnswering(ElectraPreTrainedModel):
     """
 
     config_class = ElectraConfig
-    pretrained_model_archive_map = ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST
     base_model_prefix = "electra"
 
     def __init__(self, config, weight=None):
@@ -949,7 +931,6 @@ def forward(
 
 class XLMRobertaForQuestionAnswering(RobertaForQuestionAnswering):
     config_class = XLMRobertaConfig
-    pretrained_model_archive_map = XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST
 
 
 class BigBirdForMultiLabelSequenceClassification(BigBirdPreTrainedModel):

diff --git a/simpletransformers/language_modeling/language_modeling_model.py b/simpletransformers/language_modeling/language_modeling_model.py
@@ -910,7 +910,7 @@ def collate(examples: List[torch.Tensor]):
                 if args.fp16:
                     with amp.autocast():
                         if args.model_type == "longformer":
-                            outputs = model(inputs, attention_mask=None, labels=labels)
+                            outputs = model(inputs, labels=labels)
                         elif args.model_type == "electra":
                             outputs = model(
                                 inputs,
@@ -933,9 +933,7 @@ def collate(examples: List[torch.Tensor]):
                             loss = outputs[0]
                 else:
                     if args.model_type == "longformer":
-                        outputs = model(
-                            **inputs_dict, attention_mask=None, labels=labels
-                        )
+                        outputs = model(**inputs_dict, labels=labels)
                     elif args.model_type == "electra":
                         outputs = model(
                             input_ids,