diff --git a/nncf/experimental/common/tensor_statistics/collectors.py b/nncf/experimental/common/tensor_statistics/collectors.py index c918a985ca4..3655fffe5d6 100644 --- a/nncf/experimental/common/tensor_statistics/collectors.py +++ b/nncf/experimental/common/tensor_statistics/collectors.py @@ -145,10 +145,21 @@ def _register_reduced_input_impl(self, x: TensorType) -> None: :param x: Tensor to register. """ - @abstractmethod def aggregate(self) -> Any: """ Aggregates collected tensors and returns aggregated result. + In case no tensors were collected returns None. + + :return: Aggregated result. + """ + if self._collected_samples: + return self._aggregate_impl() + return None + + @abstractmethod + def _aggregate_impl(self) -> Any: + """ + Aggregates collected tensors and returns aggregated result. :return: Aggregated result. """ @@ -503,7 +514,7 @@ def __init__(self, num_samples: Optional[int]): def _register_reduced_input_impl(self, x: TensorType) -> None: self._container.append(x.tensor) - def aggregate(self): + def _aggregate_impl(self): return self._container @@ -514,7 +525,7 @@ def __init__(self): def _register_reduced_input_impl(self, x: TensorType) -> None: self._container = x - def aggregate(self): + def _aggregate_impl(self): return self._container.shape @@ -525,7 +536,7 @@ def _register_reduced_input_impl(self, x: TensorType) -> None: else: self._container = self._tensor_processor.min(x, self._container) - def aggregate(self): + def _aggregate_impl(self): return self._container.tensor @@ -536,7 +547,7 @@ def _register_reduced_input_impl(self, x: TensorType) -> None: else: self._container = self._tensor_processor.max(x, self._container) - def aggregate(self): + def _aggregate_impl(self): return self._container.tensor @@ -555,19 +566,19 @@ def _register_reduced_input_impl(self, x: TensorType) -> None: else: self._container.append(x) - def _aggregate(self, fn): + def _offline_aggregation_impl(self, fn): stacked_val = self._tensor_processor.stack(self._container) return fn(stacked_val, axis=0, keepdims=False).tensor class MeanAggregator(OfflineAggregatorBase): - def aggregate(self): - return self._aggregate(self._tensor_processor.mean) + def _aggregate_impl(self): + return self._offline_aggregation_impl(self._tensor_processor.mean) class MedianAggregator(OfflineAggregatorBase): - def aggregate(self): - return self._aggregate(self._tensor_processor.median) + def _aggregate_impl(self): + return self._offline_aggregation_impl(self._tensor_processor.median) class NoOutliersAggregatorBase(OfflineAggregatorBase, ABC): @@ -582,7 +593,7 @@ def __init__( super().__init__(tensor_processor, use_per_sample_stats, num_samples, window_size) self._quantile = quantile - def _aggregate(self, fn) -> List[NNCFTensor]: + def _offline_aggregation_impl(self, fn) -> List[NNCFTensor]: stacked_val = self._tensor_processor.stack(self._container) result = self._tensor_processor.no_outliers_map(stacked_val, fn, axis=0, alpha=self._quantile) return result.tensor @@ -595,13 +606,13 @@ def __hash__(self) -> int: class MeanNoOutliersAggregator(NoOutliersAggregatorBase): - def aggregate(self) -> Any: - return self._aggregate(self._tensor_processor.masked_mean) + def _aggregate_impl(self) -> Any: + return self._offline_aggregation_impl(self._tensor_processor.masked_mean) class MedianNoOutliersAggregator(NoOutliersAggregatorBase): - def aggregate(self) -> Any: - return self._aggregate(self._tensor_processor.masked_median) + def _aggregate_impl(self) -> Any: + return self._offline_aggregation_impl(self._tensor_processor.masked_median) AGGREGATORS_MAP = { diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index ed545f9512a..db956997750 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -114,6 +114,9 @@ def filter_func(point: StatisticPoint) -> bool: ) assert len(tensor_collectors) == 1 stat = tensor_collectors[0].get_statistics() + if stat.min_values is None or stat.max_values is None: + continue + conv_in_cont = ConvParamsContainer(conv_in, model, graph, self._backend_entity) conv_out_cont = ConvParamsContainer(conv_out, model, graph, self._backend_entity) diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index b62c0563783..fa4f8e71ffe 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -628,7 +628,10 @@ def filter_func(point: StatisticPoint) -> bool: for tensor_collector in statistic_points.get_algo_statistics_for_node( target_node_name, filter_func, self._algorithm_key ): - group_statistics.append(tensor_collector.get_statistics()) + statistics = tensor_collector.get_statistics() + if statistics.min_values is None or statistics.max_values is None: + raise RuntimeError(f"Statistics were not collected for the node {target_node_name}") + group_statistics.append(statistics) unified_values = self._backend_entity.unify_statistics(group_statistics) for quantization_target_point in unified_scale_group: @@ -661,6 +664,8 @@ def filter_func(point: StatisticPoint) -> bool: half_range = quantization_target_point in quantization_points_overflow_fix narrow_range = get_quantizer_narrow_range(qconfig, quant_group) statistics = tensor_collector.get_statistics() + if statistics.min_values is None or statistics.max_values is None: + raise RuntimeError(f"Statistics were not collected for the node {target_node_name}") parameters = calculate_quantizer_parameters(statistics, qconfig, quant_group, narrow_range, half_range) command = self._backend_entity.create_quantizer_insertion_command( graph, quantization_target_point, qconfig, parameters diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index 0815af828b0..670283a75f0 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -109,9 +109,13 @@ def apply( for group_id, nodes in tqdm(node_groups.items(), desc="Applying Smooth Quant"): best_ratio = 0.0 + empty_statistic = False for node_to_smooth in nodes: source_node, port_id = group_id activations_value = self._get_statistics_for_node(statistic_points, node_to_smooth.node_name, port_id) + if any(val is None for val in activations_value): + empty_statistic = True + break activations_value = self._backend_entity.clip_statistics(activations_value) weights_port = self._backend_entity.get_weight_tensor_port_id(node_to_smooth) @@ -126,6 +130,9 @@ def apply( best_ratio = ratio best_scale = deepcopy(scales) + if empty_statistic: + continue + activation_scales = self._backend_entity.calculate_activation_scale(best_scale, nodes) weight_scales = self._backend_entity.calculate_weight_scale(best_scale, nodes) diff --git a/tests/experimental/common/test_statistic_collector.py b/tests/experimental/common/test_statistic_collector.py index 7c6944aa5e6..9346b176bd6 100644 --- a/tests/experimental/common/test_statistic_collector.py +++ b/tests/experimental/common/test_statistic_collector.py @@ -9,6 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from abc import abstractmethod from typing import List, Optional import numpy as np @@ -53,7 +54,7 @@ def __init__(self, num_samples: Optional[int]): def _register_reduced_input_impl(self, x: TensorType): return self._container.append(x) - def aggregate(self): + def _aggregate_impl(self): return self._container[0] @@ -269,3 +270,46 @@ def test_multiple_branch_reducer(): assert len(ref_stats) == len(stats) for key, value in ref_stats.items(): assert value == stats[key] + + +class TemplateTestStatisticCollector: + @abstractmethod + def get_nncf_tensor_cls(self): + pass + + @pytest.mark.parametrize("inplace", [False, True]) + @pytest.mark.parametrize("any_not_empty", [False, True]) + def test_empty_tensors_register(self, inplace, any_not_empty): + collector = TensorCollector() + reducer = DummyTensorReducer("Dummy", inplace) + aggregator = DummyTensorAggregator(5) + collector.register_statistic_branch("A", reducer, aggregator) + input_name = "input_name" + full_inputs = TensorCollector.get_tensor_collector_inputs( + {input_name: self.get_nncf_tensor_cls()(np.array([100]))}, [(hash(reducer), [input_name])] + ) + empty_inputs = TensorCollector.get_tensor_collector_inputs( + {input_name: self.get_nncf_tensor_cls()(np.array([]))}, [(hash(reducer), [input_name])] + ) + + stats = collector.get_statistics() + assert len(stats) == 1 + assert stats["A"] is None + + inputs = [full_inputs, empty_inputs, full_inputs] if any_not_empty else [empty_inputs, empty_inputs] + for input_ in inputs: + collector.register_inputs(input_) + + if any_not_empty: + assert len(aggregator._container) == 2 + assert aggregator._collected_samples == 2 + stats = collector.get_statistics() + assert len(stats) == 1 + assert stats["A"] == self.get_nncf_tensor_cls()([100]) + return + + assert len(aggregator._container) == 0 + assert aggregator._collected_samples == 0 + stats = collector.get_statistics() + assert len(stats) == 1 + assert stats["A"] is None diff --git a/tests/openvino/native/test_smooth_quant.py b/tests/openvino/native/test_smooth_quant.py index 339422b5633..e16cbb03077 100644 --- a/tests/openvino/native/test_smooth_quant.py +++ b/tests/openvino/native/test_smooth_quant.py @@ -16,6 +16,7 @@ import openvino.runtime as ov import torch +from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype from nncf.quantization.algorithms.smooth_quant.openvino_backend import OVSmoothQuantAlgoBackend from tests.post_training.test_templates.test_smooth_quant import TemplateTestSQAlgorithm from tests.shared.command import Command @@ -62,3 +63,7 @@ def check_scales(model: ov.Model, reference_values: Dict[str, np.ndarray]) -> No ref_value = np.array(ref_value) assert value.shape == ref_value.shape assert np.all(np.isclose(value, ref_value, atol=0.0001)), f"{value} != {ref_value}" + + @staticmethod + def get_matmul_metatype(): + return OVMatMulMetatype diff --git a/tests/openvino/native/test_statistic_collector.py b/tests/openvino/native/test_statistic_collector.py index c9b325db56b..d4dca76f9c5 100644 --- a/tests/openvino/native/test_statistic_collector.py +++ b/tests/openvino/native/test_statistic_collector.py @@ -9,49 +9,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np - -from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.openvino.tensor import OVNNCFTensor -from tests.experimental.common.test_statistic_collector import DummyTensorAggregator -from tests.experimental.common.test_statistic_collector import DummyTensorReducer - - -# pylint:disable=protected-access -def test_empty_tensors_register(): - collector = TensorCollector() - reducer = DummyTensorReducer("Dummy") - aggregator = DummyTensorAggregator(5) - collector.register_statistic_branch("A", reducer, aggregator) - input_name = "input_name" - full_inputs = TensorCollector.get_tensor_collector_inputs( - {input_name: OVNNCFTensor(np.array([100]))}, [(hash(reducer), [input_name])] - ) - empty_inputs = TensorCollector.get_tensor_collector_inputs( - {input_name: OVNNCFTensor(np.array([]))}, [(hash(reducer), [input_name])] - ) - - for inputs in [full_inputs, empty_inputs, full_inputs]: - collector.register_inputs(inputs) - assert len(aggregator._container) == 2 - assert aggregator._collected_samples == 2 - +from tests.experimental.common.test_statistic_collector import TemplateTestStatisticCollector -# pylint:disable=protected-access -def test_empty_inplace_tensors_register(): - collector = TensorCollector() - inplace_reducer = DummyTensorReducer("Dummy", True) - aggregator = DummyTensorAggregator(5) - collector.register_statistic_branch("A", inplace_reducer, aggregator) - input_name = "input_name" - full_inputs = TensorCollector.get_tensor_collector_inputs( - {input_name: OVNNCFTensor(np.array([100]))}, [(hash(inplace_reducer), [input_name])] - ) - empty_inputs = TensorCollector.get_tensor_collector_inputs( - {input_name: OVNNCFTensor(np.array([]))}, [(hash(inplace_reducer), [input_name])] - ) - for inputs in [full_inputs, empty_inputs, full_inputs]: - collector.register_inputs(inputs) - assert len(aggregator._container) == 2 - assert aggregator._collected_samples == 2 +class TestOVStatisticCollector(TemplateTestStatisticCollector): + def get_nncf_tensor_cls(self): + return OVNNCFTensor diff --git a/tests/post_training/test_templates/helpers.py b/tests/post_training/test_templates/helpers.py index 48b39835a8b..057cc5b52c9 100644 --- a/tests/post_training/test_templates/helpers.py +++ b/tests/post_training/test_templates/helpers.py @@ -151,6 +151,20 @@ def forward(self, x): return x_1, x_2 +class NonZeroLinearModel(nn.Module): + INPUT_SIZE = [10] + + def forward(self, x): + zeros = (x > torch.inf).float() + empty = torch.nonzero(zeros).reshape((-1, 1, 1)).float() + y = torch.matmul(empty, torch.ones((1, 5))) + y += 5 + y = torch.cat((torch.ones((1, 10)), y.reshape(1, -1)), dim=1) + y = torch.matmul(y, torch.ones(10, 10)) + y += 5 + return y + + class SplittedModel(nn.Module): INPUT_SIZE = [1, 3, 28, 28] diff --git a/tests/post_training/test_templates/test_channel_alignment.py b/tests/post_training/test_templates/test_channel_alignment.py index 6279bd1f0e8..2b063e7d90f 100644 --- a/tests/post_training/test_templates/test_channel_alignment.py +++ b/tests/post_training/test_templates/test_channel_alignment.py @@ -287,9 +287,11 @@ def _get_nncf_graph(self, num_biases: int) -> NNCFGraph: constant_metatype=self.get_constant_metatype(), ).nncf_graph + @pytest.mark.parametrize("empty_statistics", [False, True]) @pytest.mark.parametrize("num_biases", [0, 1, 2]) # pylint: disable=too-many-statements - def test_transformation_layout(self, num_biases, mocker): + # pylint: disable=too-many-branches + def test_transformation_layout(self, empty_statistics, num_biases, mocker): mocked_transformer = mocker.MagicMock() self.mock_model_transformer_factory(mocker, mocked_transformer) @@ -325,9 +327,12 @@ def f(*args, **kwargs): algorithm = ChannelAlignment() tensor_collector = TensorCollector() - tensor_collector.get_statistics = get_constant_lambda( - TestTensorStats(np.array([-1], dtype=np.int32), np.array([2], dtype=np.int32)) - ) + if empty_statistics: + stat_value = None, None + else: + stat_value = (np.array([-1], dtype=np.int32), np.array([2], dtype=np.int32)) + + tensor_collector.get_statistics = get_constant_lambda(TestTensorStats(*stat_value)) statistic_points.add_statistic_point(StatisticPoint(target_point, tensor_collector, algorithm._algorithm_key)) class MockBackend(backend_cls): @@ -357,6 +362,14 @@ class MockBackend(backend_cls): ) algorithm.apply(None, nncf_graph, statistic_points) + if empty_statistics: + assert algorithm._align_means.call_count == 0 + assert algorithm._align_scales.call_count == 0 + mocked_transformer.transform.assert_called_once() + arg = mocked_transformer.transform.call_args.args[0] + assert len(arg.transformations) == 0 + return + align_means_called = 1 if num_biases == 2 else 0 assert algorithm._align_means.call_count == align_means_called if align_means_called: diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py index eb91aa770eb..a2ec340c2ff 100644 --- a/tests/post_training/test_templates/test_ptq_params.py +++ b/tests/post_training/test_templates/test_ptq_params.py @@ -23,6 +23,9 @@ from nncf.common.quantization.structs import QuantizationPreset from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import QuantizerGroup +from nncf.common.tensor_statistics.statistic_point import StatisticPoint +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer +from nncf.common.tensor_statistics.statistics import MinMaxTensorStatistic from nncf.parameters import ModelType from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix @@ -296,3 +299,37 @@ def test_validate_scope(self, test_params, validate_scopes): algo._get_ignored_names(nncf_graph, inference_nncf_graph, ignored_patterns) else: algo._get_ignored_names(nncf_graph, inference_nncf_graph, ignored_patterns) + + @pytest.mark.parametrize("mode", ["target_point", "unified_scales"]) + def test_empty_statistics(self, mode, mocker): + algo = MinMaxQuantization() + target_point = self.target_point(TargetType.PRE_LAYER_OPERATION, "A", 0) + stat_points = StatisticPointsContainer() + + class DummyMinMaxTensorStatistic(MinMaxTensorStatistic): + def tensor_eq(self): + return True + + class EmptyTensorCollector: + def get_statistics(self): + return DummyMinMaxTensorStatistic(None, None) + + dummy_tp = {target_point: QuantizerConfig()} + if mode == "target_point": + dummy_tps = (dummy_tp, {}) + else: + dummy_tps = ({}, ((target_point,),)) + stat_points.add_statistic_point(StatisticPoint(target_point, EmptyTensorCollector(), algo._algorithm_key)) + mocker.patch("nncf.common.factory.ModelTransformerFactory.create", return_value=mocker.MagicMock()) + mocker.patch( + "nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization._get_quantization_target_points", + return_value=dummy_tps, + ) + mocker.patch( + "nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization._get_quantization_points_overflow_fix", + return_value=mocker.MagicMock(), + ) + with pytest.raises(RuntimeError) as exc_info: + algo.apply(None, None, stat_points) + + assert str(exc_info.value) == "Statistics were not collected for the node A" diff --git a/tests/post_training/test_templates/test_smooth_quant.py b/tests/post_training/test_templates/test_smooth_quant.py index 6bddc8b9acb..aa094c1024d 100644 --- a/tests/post_training/test_templates/test_smooth_quant.py +++ b/tests/post_training/test_templates/test_smooth_quant.py @@ -15,14 +15,17 @@ import pytest from nncf.common.factory import NNCFGraphFactory +from nncf.common.factory import StatisticsAggregatorFactory from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.parameters import ModelType from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant from nncf.quantization.algorithms.smooth_quant.backend import SmoothQuantAlgoBackend from tests.post_training.test_templates.helpers import LinearModel +from tests.post_training.test_templates.helpers import NonZeroLinearModel from tests.post_training.test_templates.helpers import get_static_dataset TModel = TypeVar("TModel") @@ -62,6 +65,13 @@ def get_backend() -> SmoothQuantAlgoBackend: Returns backend-specific SmoothQuantAlgoBackend. """ + @staticmethod + @abstractmethod + def get_matmul_metatype(): + """ + Returns backend-specific MatMul metatype + """ + @staticmethod def get_quantization_algorithm(): return PostTrainingQuantization( @@ -112,3 +122,28 @@ def test_smooth_quant(self): assert isinstance(reducer, AbsMaxReducer) assert reducer.inplace == inplace_type assert reducer._reduction_shape == reduction_shape + + def test_empty_stats(self, mocker, tmpdir): + model_cls = NonZeroLinearModel + model = self.backend_specific_model(model_cls(), tmpdir) + dataset = get_static_dataset(model_cls.INPUT_SIZE, self.get_transform_fn(), self.fn_to_type) + + graph = NNCFGraphFactory.create(model) + algo = SmoothQuant(subset_size=1, inplace_statistics=False) + algo_statistic_points = algo.get_statistic_points(model, graph) + statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) + statistics_aggregator.register_statistic_points(algo_statistic_points) + statistics_aggregator.collect_statistics(model, graph) + + mocked_transformer = mocker.MagicMock() + mocker.patch("nncf.common.factory.ModelTransformerFactory.create", return_value=mocked_transformer) + algo.apply(model, graph, algo_statistic_points) + + mocked_transformer.transform.assert_called_once() + arg = mocked_transformer.transform.call_args.args[0] + assert len(arg.transformations) == 2 + + mm_metatype = self.get_matmul_metatype() + matmuls = [node for node in graph.topological_sort() if node.metatype == mm_metatype] + for transformation in arg.transformations: + assert transformation.target_point.target_node_name != matmuls[0].node_name