huggingface · regisss · Sep 25, 2024 · Aug 20, 2024
@@ -55,6 +55,7 @@
     GaudiGPT2Block,
     GaudiGPT2DoubleHeadsModel,
     GaudiGPT2LMHeadModel,
+    GaudiGPTBigCodeAttention,
     GaudiGPTBigCodeForCausalLM,
     GaudiGPTJAttention,
     GaudiGPTJBlock,
@@ -148,7 +149,6 @@
     gaudi_generate_speech,
     gaudi_get_extended_attention_mask,
     gaudi_gpt2_forward,
-    gaudi_gpt_bigcode_attention_forward,
     gaudi_gpt_bigcode_block_forward,
     gaudi_gpt_bigcode_model_forward,
     gaudi_gpt_neox_attention_forward,
@@ -356,12 +356,13 @@ def adapt_transformers_to_gaudi():
     transformers.models.gptj.modeling_gptj.GPTJModel = GaudiGPTJModel
 
     # Optimization for GPTBigCode on Gaudi
-    transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeAttention.forward = (
-        gaudi_gpt_bigcode_attention_forward
-    )
+    transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeAttention = GaudiGPTBigCodeAttention
     transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeForCausalLM = GaudiGPTBigCodeForCausalLM
     transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeBlock.forward = gaudi_gpt_bigcode_block_forward
     transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeModel.forward = gaudi_gpt_bigcode_model_forward
+    transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBIGCODE_ATTENTION_CLASSES.update(
+        {"eager": GaudiGPTBigCodeAttention}
+    )
 
     # Optimization for gpt-neox generation on Gaudi
     transformers.models.gpt_neox.modeling_gpt_neox.GPTNeoXForCausalLM = GaudiGPTNeoXForCausalLM

@@ -79,8 +79,8 @@
     gaudi_gpt2_forward,
 )
 from .gpt_bigcode import (
+    GaudiGPTBigCodeAttention,
     GaudiGPTBigCodeForCausalLM,
-    gaudi_gpt_bigcode_attention_forward,
     gaudi_gpt_bigcode_block_forward,
     gaudi_gpt_bigcode_model_forward,
 )

@@ -1,6 +1,6 @@
 from .modeling_gpt_bigcode import (
+    GaudiGPTBigCodeAttention,
     GaudiGPTBigCodeForCausalLM,
-    gaudi_gpt_bigcode_attention_forward,
     gaudi_gpt_bigcode_block_forward,
     gaudi_gpt_bigcode_model_forward,
 )