Skip to content

Commit

Permalink
Optimized inference of XGLM model on HPU
Browse files Browse the repository at this point in the history
Signed-off-by: Ye, Xinyu <[email protected]>
  • Loading branch information
XinyuYe-Intel committed Sep 10, 2024
1 parent 8570453 commit 2f78787
Show file tree
Hide file tree
Showing 5 changed files with 543 additions and 0 deletions.
1 change: 1 addition & 0 deletions optimum/habana/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
"stablelm",
"mamba",
"deci",
"xglm",
]


Expand Down
10 changes: 10 additions & 0 deletions optimum/habana/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
GaudiStarcoder2DecoderLayer,
GaudiStarcoder2ForCausalLM,
GaudiStarcoder2Model,
GaudiXGLMForCausalLM,
LlamaConfig,
MistralConfig,
MixtralConfig,
Expand Down Expand Up @@ -195,6 +196,9 @@
gaudi_wav2vec2_forward,
gaudi_wav2vec2_tdnnlayer_forward,
gaudi_wav2vec2forctc_forward,
gaudi_xglm_attention_forward,
gaudi_xglm_decoder_layer_forward,
gaudi_xglm_model_forward,
)


Expand Down Expand Up @@ -561,3 +565,9 @@ def adapt_transformers_to_gaudi():

transformers.AutoConfig.register("deci", DeciLMConfig)
transformers.AutoModelForCausalLM.register(DeciLMConfig, DeciLMForCausalLM)

# Optimization for starcoder2 on Gaudi
transformers.models.xglm.modeling_xglm.XGLMForCausalLM = GaudiXGLMForCausalLM
transformers.models.xglm.modeling_xglm.XGLMModel.forward = gaudi_xglm_model_forward
transformers.models.xglm.modeling_xglm.XGLMAttention.forward = gaudi_xglm_attention_forward
transformers.models.xglm.modeling_xglm.XGLMDecoderLayer.forward = gaudi_xglm_decoder_layer_forward
6 changes: 6 additions & 0 deletions optimum/habana/transformers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,3 +226,9 @@
gaudi_wav2vec2_tdnnlayer_forward,
gaudi_wav2vec2forctc_forward,
)
from .xglm import (
gaudi_xglm_attention_forward,
gaudi_xglm_decoder_layer_forward,
gaudi_xglm_model_forward,
GaudiXGLMForCausalLM,
)
6 changes: 6 additions & 0 deletions optimum/habana/transformers/models/xglm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .modeling_xglm import (
gaudi_xglm_attention_forward,
gaudi_xglm_decoder_layer_forward,
gaudi_xglm_model_forward,
GaudiXGLMForCausalLM,
)
Loading

0 comments on commit 2f78787

Please sign in to comment.