Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Nov 1, 2024
1 parent 3ba9b06 commit 68a8696
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ def apply(
else:
if self._scale_estimation:
scale_estimation_params = self._advanced_parameters.scale_estimation_params
scales = ScaleEstimation(
scales, zero_points = ScaleEstimation(
model,
self._backend_entity.name_to_node_mapping,
all_weight_params,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def apply(
graph: NNCFGraph,
statistic_points: Optional[StatisticPointsContainer] = None,
dataset: Optional[Dataset] = None,
) -> Dict[str, Tensor]:
) -> Tuple[Dict[str, Tensor], Dict[str, Tensor]]:
"""
Estimates better scale for the int4 nodes in the model.
Minimizes per-group difference between floating point MatMul and
Expand All @@ -124,10 +124,10 @@ def apply(
:param graph: Model graph.
:param statistic_points: Statistic points with collected statistics values.
:param dataset: A representative dataset for the calibration process.
:return: Dict with pairs (weight name, estimated scale).
:return: Two dictionaries for estimated scales and zero points for each weight name.
"""

scales = dict()
scales, zero_points = dict(), dict()

for wp in track(self._all_weight_params, description="Applying Scale Estimation"):
weight_name = wp.weight_name
Expand All @@ -147,7 +147,7 @@ def apply(

weight = self._backend_entity.get_weight(wp.node_with_weight, weight_port_id, model, graph)

scales[weight_name], _ = self.calculate_quantization_params(
scales[weight_name], zero_points[weight_name] = self.calculate_quantization_params(
self._backend_entity,
stats,
weight,
Expand All @@ -159,7 +159,7 @@ def apply(
self._weight_penalty,
)

return scales
return scales, zero_points

@staticmethod
def calculate_quantization_params(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,12 @@ def do_int_quantization(
"""
assert config.is_integer(), "The function supports integer quantization only"
group_size = config.group_size
is_asym = config.mode in [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT4_ASYM]
if is_asym and (precomputed_scale is None) != (precomputed_zero_point is None):
raise ValueError(
"If precomputed quantization parameters are provided, both scale and zero point are required "
"for asymmetric quantization."
)

if weight.dtype != TensorDataType.float32:
weight = weight.astype(TensorDataType.float32)
Expand All @@ -366,7 +372,8 @@ def do_int_quantization(
# weights are reshaped from [a1, r, a2] to [a1, r//gs, gs, a2]
weight, reduction_axes = reshape_weight_for_grouped_quantization(weight, reduction_axes, group_size)

if precomputed_zero_point is None or precomputed_zero_point is None:
scale, zero_point = None, None
if precomputed_scale is None or (is_asym and precomputed_zero_point is None):
scale, zero_point = calculate_integer_quantization_params(weight, reduction_axes, config)
if precomputed_scale is not None:
scale = precomputed_scale
Expand Down
48 changes: 48 additions & 0 deletions tests/openvino/native/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -1074,6 +1074,54 @@ def test_compressed_weighs_range(mode, data):
assert np.allclose(np.abs(compressed_weighs.data), np.abs(w.data))


@pytest.mark.parametrize(
"configuration",
[
(WeightCompressionConfig(CompressWeightsMode.INT8_ASYM), False, False, False),
(WeightCompressionConfig(CompressWeightsMode.INT8_ASYM), True, True, False),
(WeightCompressionConfig(CompressWeightsMode.INT8_ASYM), True, False, True),
(WeightCompressionConfig(CompressWeightsMode.INT8_ASYM), False, True, True),
(WeightCompressionConfig(CompressWeightsMode.INT4_ASYM), False, False, False),
(WeightCompressionConfig(CompressWeightsMode.INT4_ASYM), True, True, False),
(WeightCompressionConfig(CompressWeightsMode.INT4_ASYM), True, False, True),
(WeightCompressionConfig(CompressWeightsMode.INT4_ASYM), False, True, True),
(WeightCompressionConfig(CompressWeightsMode.INT8_SYM), True, False, False),
(WeightCompressionConfig(CompressWeightsMode.INT8_SYM), False, False, False),
(WeightCompressionConfig(CompressWeightsMode.INT4_SYM), True, False, False),
(WeightCompressionConfig(CompressWeightsMode.INT4_SYM), False, False, False),
],
)
def test_int_quantization_with_precomputed_parameters(configuration):
config, precompute_scale, precompute_zero_point, raises = configuration
is_asym = config.mode in [CompressWeightsMode.INT4_ASYM, CompressWeightsMode.INT8_ASYM]

precomputed_scale, precomputed_zero_point = None, None
weight = Tensor(((np.arange(11) - 5) / 10).astype(np.float32)[:, None])
if precompute_scale:
precomputed_scale = Tensor(-((np.arange(11) - 5) / 100).astype(np.float32)[:, None])
if precompute_zero_point:
precomputed_zero_point = Tensor(np.arange(11).astype(np.int32)[:, None])

if raises:
with pytest.raises(ValueError) as exc_info:
_, scale, zero_point = do_int_quantization(weight, -1, config, precomputed_scale, precomputed_zero_point)
assert exc_info.value == (
"If precomputed quantization parameters are provided, both scale and zero point "
"are required for asymmetric quantization."
)
return
else:
_, scale, zero_point = do_int_quantization(weight, -1, config, precomputed_scale, precomputed_zero_point)

if precompute_scale:
assert np.allclose(scale.data, precomputed_scale.data)
if is_asym:
if precompute_zero_point:
assert np.allclose(zero_point.data, precomputed_zero_point.data)
else:
assert zero_point is None


@pytest.mark.parametrize("mode", INT4_NF4_MODES)
def test_call_max_var_criterion_with_dataset_gptq_neg_group_size(mode):
model = AWQMatmulModel().ov_model
Expand Down

0 comments on commit 68a8696

Please sign in to comment.