Optimized inference of XGLM model on HPU

Signed-off-by: Ye, Xinyu <[email protected]>
XinyuYe-Intel · Sep 10, 2024 · 2f78787 · 2f78787
1 parent 8570453
commit 2f78787
Show file tree

Hide file tree

Showing 5 changed files with 543 additions and 0 deletions.
diff --git a/optimum/habana/transformers/generation/utils.py b/optimum/habana/transformers/generation/utils.py
@@ -105,6 +105,7 @@
     "stablelm",
     "mamba",
     "deci",
+    "xglm",
 ]
 
 

diff --git a/optimum/habana/transformers/modeling_utils.py b/optimum/habana/transformers/modeling_utils.py
@@ -96,6 +96,7 @@
     GaudiStarcoder2DecoderLayer,
     GaudiStarcoder2ForCausalLM,
     GaudiStarcoder2Model,
+    GaudiXGLMForCausalLM,
     LlamaConfig,
     MistralConfig,
     MixtralConfig,
@@ -195,6 +196,9 @@
     gaudi_wav2vec2_forward,
     gaudi_wav2vec2_tdnnlayer_forward,
     gaudi_wav2vec2forctc_forward,
+    gaudi_xglm_attention_forward,
+    gaudi_xglm_decoder_layer_forward,
+    gaudi_xglm_model_forward,
 )
 
 
@@ -561,3 +565,9 @@ def adapt_transformers_to_gaudi():
 
     transformers.AutoConfig.register("deci", DeciLMConfig)
     transformers.AutoModelForCausalLM.register(DeciLMConfig, DeciLMForCausalLM)
+
+    # Optimization for starcoder2 on Gaudi
+    transformers.models.xglm.modeling_xglm.XGLMForCausalLM = GaudiXGLMForCausalLM
+    transformers.models.xglm.modeling_xglm.XGLMModel.forward = gaudi_xglm_model_forward
+    transformers.models.xglm.modeling_xglm.XGLMAttention.forward = gaudi_xglm_attention_forward
+    transformers.models.xglm.modeling_xglm.XGLMDecoderLayer.forward = gaudi_xglm_decoder_layer_forward
diff --git a/optimum/habana/transformers/models/__init__.py b/optimum/habana/transformers/models/__init__.py
@@ -226,3 +226,9 @@
     gaudi_wav2vec2_tdnnlayer_forward,
     gaudi_wav2vec2forctc_forward,
 )
+from .xglm import (
+    gaudi_xglm_attention_forward,
+    gaudi_xglm_decoder_layer_forward,
+    gaudi_xglm_model_forward,
+    GaudiXGLMForCausalLM,
+)
diff --git a/optimum/habana/transformers/models/xglm/__init__.py b/optimum/habana/transformers/models/xglm/__init__.py
@@ -0,0 +1,6 @@
+from .modeling_xglm import (
+    gaudi_xglm_attention_forward,
+    gaudi_xglm_decoder_layer_forward,
+    gaudi_xglm_model_forward,
+    GaudiXGLMForCausalLM,
+)