Skip to content

Commit

Permalink
Merge branch 'main' into xglm
Browse files Browse the repository at this point in the history
  • Loading branch information
XinyuYe-Intel authored Sep 26, 2024
2 parents f748171 + a4dbcff commit 48dc721
Show file tree
Hide file tree
Showing 10 changed files with 494 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ The following model architectures, tasks and device distributions have been vali
| BLOOM(Z) | | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| StarCoder / StarCoder2 | :heavy_check_mark: | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| GPT-J | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>Single card</li><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| GPT-Neo | | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| GPT-NeoX | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| OPT | | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| Llama 2 / CodeLlama / Llama 3 / Llama Guard / Granite | :heavy_check_mark: | :heavy_check_mark: | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification) (Llama Guard)</li> |
Expand Down
1 change: 1 addition & 0 deletions docs/source/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
| BLOOM(Z) | | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| StarCoder / StarCoder2 || <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| GPT-J | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>Single card</li><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| GPT-Neo | | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| GPT-NeoX | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| OPT | | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
| Llama 2 / CodeLlama / Llama 3 / Llama Guard / Granite ||| <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification) (Llama Guard)</li> |
Expand Down
2 changes: 1 addition & 1 deletion examples/image-to-text/run_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def main():
args.image_path = [
"https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true"
]
if args.prompt is None:
if args.prompt is None and model_type in ("llava", "llava_next"):
if model_type == "llava":
processor = LlavaProcessor.from_pretrained(args.model_name_or_path)
elif model_type == "llava_next":
Expand Down
1 change: 1 addition & 0 deletions optimum/habana/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
"gpt2",
"opt",
"gptj",
"gpt_neo",
"gpt_neox",
"llama",
"falcon",
Expand Down
45 changes: 45 additions & 0 deletions optimum/habana/transformers/modeling_attn_mask_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,51 @@ def _make_causal_mask(

return mask[None, None, :, :].expand(bsz, 1, tgt_len, tgt_len + past_key_values_length)

def to_4d(
self,
attention_mask_2d: torch.Tensor,
query_length: int,
dtype: torch.dtype,
key_value_length: Optional[int] = None,
) -> torch.Tensor:
"""
Converts 2D attention mask to 4D attention mask by expanding mask to (bsz, head_dim=1, query_length,
key_value_length) shape and by adding a large negative bias to not-attended positions. If attention_mask is
causal, a causal mask will be added.
"""
input_shape = (attention_mask_2d.shape[0], query_length)
device = attention_mask_2d.device

# create causal mask
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
if (input_shape[-1] > 1 or self.sliding_window is not None) and self.is_causal:
if key_value_length is None:
raise ValueError(
"This attention mask converter is causal. Make sure to pass `key_value_length` to correctly create a causal mask."
)
past_key_values_length = key_value_length - query_length
causal_4d_mask = self._make_causal_mask(
input_shape,
dtype,
device=device,
past_key_values_length=past_key_values_length,
sliding_window=self.sliding_window,
)

# just create a bool tensor with shape [bsz, 1, tgt_seq_len, src_seq_len]
# OOM problem can be prevent by using bool tensor
bsz, src_len = attention_mask_2d.size()
tgt_len = input_shape[-1] if input_shape[-1] is not None else src_len
bool_mask = attention_mask_2d != 1.0
expanded_attn_mask = bool_mask[:, None, None, :].expand(bsz, 1, tgt_len, src_len).to(device=device)

return causal_4d_mask.masked_fill(expanded_attn_mask, torch.finfo(dtype).min)
elif self.sliding_window is not None:
raise NotImplementedError("Sliding window is currently only implemented for causal masking")

# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
return self._expand_mask(attention_mask_2d, dtype, tgt_len=input_shape[-1]).to(device)


def _gaudi_prepare_4d_causal_attention_mask(
attention_mask: Optional[torch.Tensor],
Expand Down
12 changes: 12 additions & 0 deletions optimum/habana/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
GaudiGPTJBlock,
GaudiGPTJForCausalLM,
GaudiGPTJModel,
GaudiGPTNeoForCausalLM,
GaudiGPTNeoXForCausalLM,
GaudiGPTNeoXLayer,
GaudiLlamaAttention,
Expand Down Expand Up @@ -152,6 +153,10 @@
gaudi_gpt2_forward,
gaudi_gpt_bigcode_block_forward,
gaudi_gpt_bigcode_model_forward,
gaudi_gpt_neo_attention_forward,
gaudi_gpt_neo_block_forward,
gaudi_gpt_neo_model_forward,
gaudi_gpt_neo_selfattention_forward,
gaudi_gpt_neox_attention_forward,
gaudi_gpt_neox_model_forward,
gaudi_gpt_neox_rotary_embedding_set_cos_sin_cache,
Expand Down Expand Up @@ -368,6 +373,13 @@ def adapt_transformers_to_gaudi():
{"eager": GaudiGPTBigCodeAttention}
)

# Optimization for gpt-neo generation on Gaudi
transformers.models.gpt_neo.modeling_gpt_neo.GPTNeoForCausalLM = GaudiGPTNeoForCausalLM
transformers.models.gpt_neo.modeling_gpt_neo.GPTNeoModel.forward = gaudi_gpt_neo_model_forward
transformers.models.gpt_neo.modeling_gpt_neo.GPTNeoBlock.forward = gaudi_gpt_neo_block_forward
transformers.models.gpt_neo.modeling_gpt_neo.GPTNeoAttention.forward = gaudi_gpt_neo_attention_forward
transformers.models.gpt_neo.modeling_gpt_neo.GPTNeoSelfAttention.forward = gaudi_gpt_neo_selfattention_forward

# Optimization for gpt-neox generation on Gaudi
transformers.models.gpt_neox.modeling_gpt_neox.GPTNeoXForCausalLM = GaudiGPTNeoXForCausalLM
transformers.models.gpt_neox.modeling_gpt_neox.GPTNeoXModel.forward = gaudi_gpt_neox_model_forward
Expand Down
7 changes: 7 additions & 0 deletions optimum/habana/transformers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@
gaudi_gpt_bigcode_block_forward,
gaudi_gpt_bigcode_model_forward,
)
from .gpt_neo import (
GaudiGPTNeoForCausalLM,
gaudi_gpt_neo_attention_forward,
gaudi_gpt_neo_block_forward,
gaudi_gpt_neo_model_forward,
gaudi_gpt_neo_selfattention_forward,
)
from .gpt_neox import (
GaudiGPTNeoXForCausalLM,
GaudiGPTNeoXLayer,
Expand Down
7 changes: 7 additions & 0 deletions optimum/habana/transformers/models/gpt_neo/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .modeling_gpt_neo import (
GaudiGPTNeoForCausalLM,
gaudi_gpt_neo_attention_forward,
gaudi_gpt_neo_block_forward,
gaudi_gpt_neo_model_forward,
gaudi_gpt_neo_selfattention_forward,
)
Loading

0 comments on commit 48dc721

Please sign in to comment.