Skip to content

Commit

Permalink
skip tests
Browse files Browse the repository at this point in the history
Signed-off-by: Tyler Michael Smith <[email protected]>
  • Loading branch information
tlrmchlsmth committed Dec 18, 2024
1 parent fbc974d commit 117c978
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 17 deletions.
5 changes: 4 additions & 1 deletion tests/kernels/test_semi_structured.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import torch

from vllm import _custom_ops as ops
from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
sparse_cutlass_supported)
from vllm.platforms import current_platform

CUDA_DEVICES = [
Expand Down Expand Up @@ -102,10 +104,11 @@ def baseline_scaled_mm(a: torch.Tensor,
return output


@pytest.mark.skipif(not current_platform.has_device_capability(90),
@pytest.mark.skipif(not sparse_cutlass_supported(),
reason="Sparse FP8 is not yet supported on this GPU type.")
# Test working with a subset of A and B for sparse matmul
def test_cutlass_sparse_subset():

big_m = 1024
m, n, k = 512, 512, 512

Expand Down
8 changes: 5 additions & 3 deletions tests/quantization/test_compressed_tensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
CompressedTensorsW4A16Sparse24, CompressedTensorsW8A8Fp8,
CompressedTensorsW8A8Int8, CompressedTensorsW8A16Fp8,
CompressedTensorsWNA16)
from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
sparse_cutlass_supported)
from vllm.platforms import current_platform


Expand Down Expand Up @@ -212,7 +214,7 @@ def test_compressed_tensors_kv_cache(vllm_runner):
assert output


@pytest.mark.skipif(not current_platform.has_device_capability(90),
@pytest.mark.skipif(not sparse_cutlass_supported(),
reason="Sparse FP8 is not yet supported on this GPU type.")
def _test_2of4_quant_models(qkv_proj, weight_strategy, input_strategy):
assert isinstance(qkv_proj.quant_method, CompressedTensorsLinearMethod)
Expand Down Expand Up @@ -254,7 +256,7 @@ def test_compressed_tensors_2of4_quant_fp8(vllm_runner, args_2of4):
assert output


@pytest.mark.skipif(not current_platform.has_device_capability(90),
@pytest.mark.skipif(not sparse_cutlass_supported(),
reason="Sparse FP8 is not yet supported on this GPU type.")
@pytest.mark.parametrize("args_2of4", [
("nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Dynamic-IA-Per-Channel-Weight-testing",
Expand All @@ -279,7 +281,7 @@ def test_compressed_tensors_2of4_quant_int8(vllm_runner, args_2of4):
assert output


@pytest.mark.skipif(not current_platform.has_device_capability(90),
@pytest.mark.skipif(not sparse_cutlass_supported(),
reason="Sparse FP8 is not yet supported on this GPU type.")
@pytest.mark.parametrize(
"args_2of4",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,15 @@
from vllm.model_executor.layers.quantization.compressed_tensors.schemes import (
CompressedTensorsScheme)
from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
convert_to_channelwise)
convert_to_channelwise, sparse_cutlass_supported)
from vllm.model_executor.parameter import (BasevLLMParameter,
ChannelQuantScaleParameter,
ModelWeightParameter,
PerTensorScaleParameter)
from vllm.platforms import current_platform

__all__ = ["CompressedTensors24"]


def sparse_cutlass_supported() -> bool:
# sparse cutlass is not supported on Rocm
if current_platform.is_rocm():
return False

capability_tuple = current_platform.get_device_capability()
capability = -1 if capability_tuple is None else capability_tuple.to_int()

return ops.cutlass_sparse_scaled_mm_supported(capability)


class CompressedTensors24(CompressedTensorsScheme):

def __init__(self,
Expand Down
11 changes: 11 additions & 0 deletions vllm/model_executor/layers/quantization/utils/w8a8_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,17 @@
TORCH_DEVICE_IDENTITY = torch.ones(1, dtype=torch.float32)


def sparse_cutlass_supported() -> bool:
# sparse cutlass is not supported on Rocm
if current_platform.is_rocm():
return False

capability_tuple = current_platform.get_device_capability()
capability = -1 if capability_tuple is None else capability_tuple.to_int()

return ops.cutlass_sparse_scaled_mm_supported(capability)


def cutlass_fp8_supported() -> bool:
# cutlass is not supported on Rocm
if current_platform.is_rocm():
Expand Down

0 comments on commit 117c978

Please sign in to comment.