Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix GPTQ for inputs with batch size != 1 and with seq len == 1 #3002

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,8 @@ def _collect_statistics(self, dataset: Dataset, nodes: List[NNCFNode], graph: NN
# activation as an input.
matmul_input_to_output_nodes_map = defaultdict(list)
for node in matmul_nodes:
if node.layer_attributes.input_attributes["transpose"]:
raise nncf.UnsupportedModelError("Transposed input is not supported")
act_node, output_port_id = self._get_activation_node_and_port(node, graph)
matmul_input_to_output_nodes_map[(act_node, output_port_id)].append(node)

Expand Down
3 changes: 2 additions & 1 deletion nncf/quantization/algorithms/weight_compression/awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, TypeVar

import nncf
from nncf import Dataset
from nncf import nncf_logger
from nncf.common.factory import ModelTransformerFactory
Expand Down Expand Up @@ -117,7 +118,7 @@ def _set_backend_entity(self, model: TModel) -> None:
self._backend_entity = OVAWQAlgoAlgoBackend(model, self.name_to_node_mapping)
self._patterns = self._backend_entity.get_awq_patterns()
else:
raise RuntimeError(
raise nncf.UnsupportedBackendError(
"Cannot return backend-specific AWQ entity because {} is not supported!".format(model_backend.value)
)

Expand Down
6 changes: 3 additions & 3 deletions nncf/quantization/algorithms/weight_compression/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ def _calculate_hessian(self, node: NNCFNode, inputs: List[Tensor]) -> Tensor:
nsamples = 0

if node.metatype in self._backend_entity.convolution_metatypes:
raise RuntimeError("Convolution metatypes are not supported")
raise nncf.UnsupportedModelError("Convolution metatypes are not supported")
if node.layer_attributes.input_attributes["transpose"]:
raise RuntimeError("Transpose is not supported")
raise nncf.UnsupportedModelError("Transposed input is not supported")

hessian = fns.zeros(
(inputs[0].shape[-1], inputs[0].shape[-1]), backend=inputs[0].backend, dtype=TensorDataType.float32
Expand Down Expand Up @@ -264,7 +264,7 @@ def _quantize_weights(
scales.append(scale)
else:
if self._scale_estimation and block_compression_config.num_bits == 4:
activations = [inp.squeeze()[:, (i1 + i) : (i1 + i + group_size)] for inp in inputs]
activations = [inp[..., (i1 + i) : (i1 + i + group_size)] for inp in inputs]
Copy link
Contributor Author

@ljaljushkin ljaljushkin Oct 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Slice by last dimension, which is supposed to be hidden one.
It's aligned with processing statistics in activations_to_wc_statistics when reduction axes are all dimensions except the last one @nikita-savelyevv

wc_statistics = ScaleEstimation.activations_to_wc_statistics(activations)
scale, zero_point = ScaleEstimation.calculate_quantization_params(
self._backend_entity,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import matplotlib.pyplot as plt
import pandas as pd

import nncf
from nncf.common.logging import nncf_logger
from nncf.common.tensor_statistics.statistics import WCTensorStatistic
from nncf.common.utils.debug import DEBUG_LOG_DIR
Expand Down Expand Up @@ -178,7 +179,7 @@ def calculate_low_rank_matrices(
indexes = do_nf4_quantization(compressed_weight.tensor, compressed_weight.scale, is_normalized_weight=True)
fq_weights = do_nf4_dequantization(indexes, compressed_weight.scale, reduction_axis)
else:
raise ValueError(
raise nncf.InternalError(
f"{mode.value} mode is invalid for Lora Correction algorithm. Supported modes: INT4_SYM, INT4_ASYM, NF4"
)
# fq_w + residual = w => residual = w - fq_w
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import openvino as ov
from openvino.runtime import opset13 as opset

import nncf
from nncf.common.graph import NNCFGraph
from nncf.common.graph import NNCFNode
from nncf.common.graph.operator_metatypes import OperatorMetatype
Expand Down Expand Up @@ -236,7 +237,7 @@ def _create_compression_subgraph(
elif compression_config.mode == CompressWeightsMode.INT8_ASYM:
compression_dtype = ov.Type.u8
else:
raise ValueError(f"{compression_config.mode.value} is not supported.")
raise nncf.ParameterNotSupportedError(f"{compression_config.mode.value} is not supported.")

original_shape = weight.shape
compressed_weight = compress_weight(weight, reduction_axes, compression_config, layer_scales, layer_zero_points)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from copy import deepcopy
from typing import Any, Dict, List, Optional, Tuple, TypeVar

import nncf
from nncf import Dataset
from nncf.common.graph.graph import NNCFGraph
from nncf.common.graph.graph import NNCFNode
Expand Down Expand Up @@ -101,7 +102,7 @@ def _set_backend_entity(self, model: TModel) -> None:

self._backend_entity = OVWeightCompressionAlgoBackend(model, self.name_to_node_mapping)
else:
raise RuntimeError(
raise nncf.UnsupportedBackendError(
"Cannot return backend-specific AWQ entity because {} is not supported!".format(model_backend.value)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def transform_model(
CompressWeightsMode.INT8_SYM,
CompressWeightsMode.INT8,
]:
raise ValueError(f"{compression_config.mode.value} is not supported.")
raise nncf.ParameterNotSupportedError(f"{compression_config.mode.value} is not supported.")

weight_node = get_const_node(wc_params.node_with_weight, wc_params.weight_port_id, graph)
weight_name = weight_node.layer_attributes.name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def transform_model(
CompressWeightsMode.INT8_SYM,
CompressWeightsMode.INT8,
]:
raise ValueError(f"{compression_config.mode.value} is not supported.")
raise nncf.ParameterNotSupportedError(f"{compression_config.mode.value} is not supported.")
weight_node = get_const_node(wc_params.node_with_weight, wc_params.weight_port_id, graph)
weight_name = weight_node.node_name
weight = self.get_weight(wc_params.node_with_weight, wc_params.weight_port_id, model, graph)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,14 @@ def reshape_weight_for_grouped_quantization(
if isinstance(reduction_axes, tuple) and len(reduction_axes) == 1:
reduction_axes = reduction_axes[0]
if not isinstance(reduction_axes, int):
raise NotImplementedError(
raise nncf.UnsupportedModelError(
f"Group-wise quantization expects a single reduction axis, but given: {reduction_axes}."
)
channel_size = weight.shape[reduction_axes]
if channel_size % group_size != 0:
raise nncf.ValidationError(f"Channel size {channel_size} should be divisible by size of group {group_size}")
raise nncf.UnsupportedModelError(
f"Channel size {channel_size} should be divisible by size of group {group_size}"
)

num_groups_per_channel = channel_size // group_size
shape = list(weight.shape) # [a1, r, a2] - "r" refers to number of channels along reduction axis
Expand Down
36 changes: 18 additions & 18 deletions nncf/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def quantize(
:rtype: TModel
"""
if subset_size < 1:
raise ValueError("Subset size must be positive.")
raise nncf.ValidationError("Subset size must be positive.")

advanced_parameters = _update_advanced_quantization_parameters(advanced_parameters, calibration_dataset)

Expand Down Expand Up @@ -471,30 +471,30 @@ def compress_weights(
from nncf.torch.quantization.quantize_model import compress_weights_impl as pt_compression_weights_impl

if mode not in [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM]:
raise AttributeError(
raise nncf.ParameterNotSupportedError(
"Torch backend supports only INT8_ASYM, INT8_SYM modes for weight compression, "
f"but given {mode.value} mode."
)

if True in [awq, scale_estimation, gptq, lora_correction]:
raise AttributeError(
raise nncf.ParameterNotSupportedError(
"Torch backend does not support 'awq', 'scale_estimation', 'gptq' and 'lora_correction' options. "
"Set them to None."
)

if backup_mode is not None:
raise AttributeError("Torch backend does not support backup_mode option.")
raise nncf.ParameterNotSupportedError("Torch backend does not support backup_mode option.")

if is_wrapped_model(model):
if not model.nncf.trace_parameters:
raise ValueError(
raise nncf.ValidationError(
"Tracing capabilities with tracing parameters are required in the PyTorch model "
"for nncf.compress_weights(). Please wrap the model using "
"nncf.torch.wrap_model(model, example_input, trace_parameters=True) before calling "
"nncf.compress_weights()."
)
elif dataset is None:
raise AttributeError("Please provide a dataset of at least one element for PyTorch model tracing.")
raise nncf.ValidationError("Please provide a dataset of at least one element for PyTorch model tracing.")
else:
example_input = next(iter(dataset.get_inference_data()))
model = wrap_model(model, example_input=example_input, trace_parameters=True)
Expand All @@ -507,21 +507,21 @@ def compress_weights(
)

if mode not in [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM]:
raise AttributeError(
raise nncf.ParameterNotSupportedError(
"TorchFX backend supports only INT8_ASYM, INT8_SYM modes for weight compression, "
f"but given {mode.value} mode."
)

if backup_mode is not None:
raise AttributeError("TorchFX backend does not support backup_mode option.")
raise nncf.ParameterNotSupportedError("TorchFX backend does not support backup_mode option.")

if any((awq, scale_estimation, gptq, lora_correction)):
raise AttributeError(
raise nncf.ParameterNotSupportedError(
"TorchFX backend does not support 'awq', 'scale_estimation', 'gptq',"
"and 'lora_correction' options. Set them to None."
)
if dataset:
raise AttributeError(
raise nncf.ParameterNotSupportedError(
"TorchFX only supports data-free weights compression," "Set the 'dataset' option to None"
)
compression_weights_impl = fx_compression_weights_impl
Expand All @@ -532,13 +532,13 @@ def compress_weights(
if any((awq, scale_estimation, gptq, lora_correction)) and (
dataset is None or mode == CompressWeightsMode.E2M1
):
raise AttributeError(
raise nncf.ParameterNotSupportedError(
"Scale estimation, AWQ, GPTQ or Lora Correction algorithm is defined, "
"but dataset is None or mode is E2M1."
)

if gptq and lora_correction:
raise AttributeError(
raise nncf.ValidationError(
"Simultaneous use of Lora correction and GPTQ algorithms is not supported. Select one of them."
)

Expand All @@ -550,13 +550,13 @@ def compress_weights(
if group_size is None:
group_size = -1
if ratio != 1 or group_size != -1:
raise AttributeError(
raise nncf.ParameterNotSupportedError(
"INT8 modes assume per-channel quantization of all layers in 8 bit. "
"Default values of `ratio` (1) and `group_size` (-1) parameters can not be overridden"
)

if backup_mode is not None:
raise AttributeError("INT8 modes do not support the `backup_mode` option")
raise nncf.ParameterNotSupportedError("INT8 modes do not support the `backup_mode` option")

options = {
"all_layers": all_layers,
Expand All @@ -569,7 +569,7 @@ def compress_weights(
}
unsupported_for_int8 = [name for name, value in options.items() if value is not None]
if unsupported_for_int8:
raise AttributeError(
raise nncf.ParameterNotSupportedError(
f"INT8 modes do not support {', '.join(unsupported_for_int8)} option(s). Set them to None."
)

Expand Down Expand Up @@ -598,14 +598,14 @@ def compress_weights(
if backup_mode is None:
backup_mode = BackupMode.INT8_ASYM
if ratio != 1 and dataset is None and sensitivity_metric != SensitivityMetric.WEIGHT_QUANTIZATION_ERROR:
raise AttributeError(
raise nncf.ValidationError(
f"Mixed precision selection based on the given sensitivity metric={sensitivity_metric.value} requires "
"a dataset, but it's not provided."
)
if ratio < 0 or ratio > 1:
raise ValueError(f"The ratio should be between 0 and 1, but ratio={ratio} is specified.")
raise nncf.ValidationError(f"The ratio should be between 0 and 1, but ratio={ratio} is specified.")
if subset_size is None or subset_size <= 0:
raise ValueError(f"The subset_size value should be positive, but subset_size={subset_size} is given.")
raise nncf.ValidationError(f"The subset_size value should be positive, but subset_size={subset_size} is given.")

if compression_weights_impl is None:
raise nncf.UnsupportedBackendError(f"Unsupported type of backend: {backend}")
Expand Down
2 changes: 1 addition & 1 deletion tests/openvino/native/quantization/test_quantize_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@ def get_mock_model() -> Model:
def test_non_positive_subset_size():
model_to_test = get_mock_model()

with pytest.raises(ValueError) as e:
with pytest.raises(nncf.ValidationError) as e:
nncf.quantize(model_to_test, Dataset(MockDataset(INPUT_SHAPE)), subset_size=0)
assert "Subset size must be positive." in e.info
Loading
Loading