From 27244b26c9bd314ecfe95ee2884d945f5d7f15d9 Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Tue, 24 Dec 2024 11:51:41 +0000 Subject: [PATCH] support internvl and llava Signed-off-by: MengqingCao --- vllm/attention/layer.py | 3 ++- vllm/model_executor/models/intern_vit.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 05d997279893b..792fbd23c8428 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -227,7 +227,8 @@ def forward( key, value, scale=self.scale) - out = out.transpose(1, 2) + # TODO (cmq): revert me after op `transpose` in torch-npu is fixed + out = out.transpose(1, 2).contiguous() return out.view(bsz, q_len, -1) diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py index 7ff68bd60e8ad..1dec65dce2cbe 100644 --- a/vllm/model_executor/models/intern_vit.py +++ b/vllm/model_executor/models/intern_vit.py @@ -271,7 +271,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: v = v.transpose(1, 2) x = F.scaled_dot_product_attention(q, k, v, scale=self.scale) - x = x.transpose(1, 2).view(B, N, -1) + # TODO (cmq): revert me after op `transpose` in torch-npu is fixed + x = x.transpose(1, 2).contiguous().view(B, N, -1) x = self.proj(x) return x