diff --git a/nncf/quantization/algorithms/weight_compression/scale_estimation.py b/nncf/quantization/algorithms/weight_compression/scale_estimation.py index 487001cdcc..a557253085 100644 --- a/nncf/quantization/algorithms/weight_compression/scale_estimation.py +++ b/nncf/quantization/algorithms/weight_compression/scale_estimation.py @@ -369,6 +369,8 @@ def calculate_quantization_params( if config.group_size == -1: result_scale = fns.squeeze(result_scale, axis=1) + if zp is not None and config.group_size == -1: + zp = fns.squeeze(zp, axis=1) return result_scale, zp diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py index 243f128f4c..347c299a50 100644 --- a/tests/openvino/native/quantization/test_weights_compression.py +++ b/tests/openvino/native/quantization/test_weights_compression.py @@ -1075,7 +1075,7 @@ def test_compressed_weighs_range(mode, data): @pytest.mark.parametrize( - "configuration", + ("config", "precompute_scale", "precompute_zero_point", "raises"), [ (WeightCompressionConfig(CompressWeightsMode.INT8_ASYM), False, False, False), (WeightCompressionConfig(CompressWeightsMode.INT8_ASYM), True, True, False), @@ -1091,8 +1091,7 @@ def test_compressed_weighs_range(mode, data): (WeightCompressionConfig(CompressWeightsMode.INT4_SYM), False, False, False), ], ) -def test_int_quantization_with_precomputed_parameters(configuration): - config, precompute_scale, precompute_zero_point, raises = configuration +def test_int_quantization_with_precomputed_parameters(config, precompute_scale, precompute_zero_point, raises): is_asym = config.mode in [CompressWeightsMode.INT4_ASYM, CompressWeightsMode.INT8_ASYM] precomputed_scale, precomputed_zero_point = None, None