Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Model] Add Internlm2 LoRA support #5064

Merged
merged 14 commits into from
Nov 28, 2024
2 changes: 1 addition & 1 deletion docs/source/models/supported_models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ Text Generation
* - :code:`InternLM2ForCausalLM`
- InternLM2
- :code:`internlm/internlm2-7b`, :code:`internlm/internlm2-chat-7b`, etc.
-
- ✅︎
- ✅︎
* - :code:`JAISLMHeadModel`
- Jais
Expand Down
22 changes: 20 additions & 2 deletions vllm/model_executor/models/internlm2.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.sequence import IntermediateTensors

from .interfaces import SupportsPP
from .interfaces import SupportsLoRA, SupportsPP
from .utils import (is_pp_missing_parameter,
make_empty_intermediate_tensors_factory, make_layers,
maybe_prefix)
Expand Down Expand Up @@ -319,7 +319,21 @@ def forward(
return hidden_states


class InternLM2ForCausalLM(nn.Module, SupportsPP):
class InternLM2ForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
packed_modules_mapping = {
"wqkv": ["wqkv"],
"gate_up_proj": ["w1", "w3"],
}

# LoRA specific attributes
supported_lora_modules = [
"wqkv",
"wo",
"gate_up_proj",
"w2",
]
embedding_modules = {}
embedding_padding_modules = []

def __init__(self,
*,
Expand All @@ -329,8 +343,12 @@ def __init__(self,
super().__init__()
config = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config
lora_config = vllm_config.lora_config

self.config = config
self.quant_config = quant_config
self.lora_config = lora_config

self.model = model_type(vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "model"))
self.output = ParallelLMHead(config.vocab_size,
Expand Down