From ec5c0dd0ee5a31fa13cc25a50d304acbb2c32f81 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Wed, 13 Sep 2023 17:02:19 +0100 Subject: [PATCH 1/7] Add Pipeline and StepwisePipeline classes --- .../algorithms/post_training/algorithm.py | 215 ------------------ .../post_training => pipelines}/__init__.py | 0 nncf/quantization/pipelines/pipeline.py | 38 ++++ .../pipelines/post_training/__init__.py | 10 + .../pipelines/post_training/pipeline.py | 120 ++++++++++ .../pipelines/stepwise_pipeline.py | 175 ++++++++++++++ 6 files changed, 343 insertions(+), 215 deletions(-) delete mode 100644 nncf/quantization/algorithms/post_training/algorithm.py rename nncf/quantization/{algorithms/post_training => pipelines}/__init__.py (100%) create mode 100644 nncf/quantization/pipelines/pipeline.py create mode 100644 nncf/quantization/pipelines/post_training/__init__.py create mode 100644 nncf/quantization/pipelines/post_training/pipeline.py create mode 100644 nncf/quantization/pipelines/stepwise_pipeline.py diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py deleted file mode 100644 index d6e6b40de80..00000000000 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Callable, Dict, List, Optional, TypeVar - -from nncf import Dataset -from nncf.common.deprecation import warning_deprecated -from nncf.common.factory import NNCFGraphFactory -from nncf.common.factory import StatisticsAggregatorFactory -from nncf.common.graph.graph import NNCFGraph -from nncf.common.logging import nncf_logger -from nncf.common.quantization.structs import QuantizationPreset -from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer -from nncf.common.utils.backend import BackendType -from nncf.common.utils.backend import copy_model -from nncf.common.utils.backend import get_backend -from nncf.parameters import ModelType -from nncf.parameters import TargetDevice -from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.algorithms.algorithm import Algorithm -from nncf.quantization.algorithms.bias_correction.algorithm import BIAS_CORRECTION_THRESHOLD -from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection -from nncf.quantization.algorithms.channel_alignment.algorithm import ChannelAlignment -from nncf.quantization.algorithms.fast_bias_correction.algorithm import FAST_BIAS_CORRECTION_THRESHOLD -from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection -from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization -from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant -from nncf.scopes import IgnoredScope - -TModel = TypeVar("TModel") -TPass = Callable[[TModel], TModel] - - -class PostTrainingQuantization(Algorithm): - """ - Implements Post-Training Quantization algorithm, which basically includes: - 1) ChannelAlignment - 2) MinMaxQuantization - 3) FastBiasCorrection or BiasCorrection - """ - - def __init__( - self, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, - target_device: TargetDevice = TargetDevice.ANY, - subset_size: int = 300, - fast_bias_correction: bool = True, - model_type: Optional[ModelType] = None, - ignored_scope: Optional[IgnoredScope] = None, - advanced_parameters: Optional[AdvancedQuantizationParameters] = None, - ): - """ - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: - - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. - :param target_device: A target device the specificity of which will be taken - into account while compressing in order to obtain the best performance - for this type of device. - :param subset_size: Size of a subset to calculate activations - statistics used for quantization. - :param fast_bias_correction: Setting this option to `False` enables a different - bias correction method which is more accurate, in general, and takes - more time but requires less memory. - :param model_type: Model type is needed to specify additional patterns - in the model. Supported only `transformer` now. - :param ignored_scope: An ignored scope that defined the list of model control - flow graph nodes to be ignored during quantization. - :param advanced_parameters: Advanced quantization parameters for - fine-tuning the quantization algorithm - """ - super().__init__() - self.algorithms = [] - self.first_stage_algorithms: List[Algorithm] = [] - - if target_device is TargetDevice.VPU: - warning_deprecated("VPU device is deprecated and will no longer be supported in the future.") - - if advanced_parameters is None: - advanced_parameters = AdvancedQuantizationParameters() - - if model_type == ModelType.TRANSFORMER and advanced_parameters.smooth_quant_alpha >= 0: - smooth_quant_algorithm = SmoothQuant( - subset_size=subset_size, - inplace_statistics=advanced_parameters.inplace_statistics, - alpha=advanced_parameters.smooth_quant_alpha, - ) - self.first_stage_algorithms.append(smooth_quant_algorithm) - - if not advanced_parameters.disable_channel_alignment: - channel_alignment = ChannelAlignment( - subset_size=subset_size, - inplace_statistics=advanced_parameters.inplace_statistics, - ) - self.first_stage_algorithms.append(channel_alignment) - - min_max_quantization = MinMaxQuantization( - preset=preset, - target_device=target_device, - subset_size=subset_size, - model_type=model_type, - ignored_scope=ignored_scope, - overflow_fix=advanced_parameters.overflow_fix, - quantize_outputs=advanced_parameters.quantize_outputs, - inplace_statistics=advanced_parameters.inplace_statistics, - activations_quantization_params=advanced_parameters.activations_quantization_params, - weights_quantization_params=advanced_parameters.weights_quantization_params, - activations_range_estimator_params=advanced_parameters.activations_range_estimator_params, - weights_range_estimator_params=advanced_parameters.weights_range_estimator_params, - backend_params=advanced_parameters.backend_params, - ) - - self.algorithms.append(min_max_quantization) - - if advanced_parameters.disable_bias_correction: - return - - bias_correction_params = advanced_parameters.bias_correction_params - if fast_bias_correction: - threshold = FAST_BIAS_CORRECTION_THRESHOLD - if bias_correction_params.threshold is not None: - threshold = bias_correction_params.threshold - bias_correction = FastBiasCorrection( - subset_size=subset_size, - threshold=threshold, - apply_for_all_nodes=bias_correction_params.apply_for_all_nodes, - inplace_statistics=advanced_parameters.inplace_statistics, - backend_params=advanced_parameters.backend_params, - ) - else: - threshold = BIAS_CORRECTION_THRESHOLD - if bias_correction_params.threshold is not None: - threshold = bias_correction_params.threshold - bias_correction_subset_size = max(int(subset_size * 0.2), 1) - bias_correction = BiasCorrection( - subset_size=bias_correction_subset_size, - threshold=threshold, - apply_for_all_nodes=bias_correction_params.apply_for_all_nodes, - inplace_statistics=advanced_parameters.inplace_statistics, - backend_params=advanced_parameters.backend_params, - ) - - self.algorithms.append(bias_correction) - - @property - def available_backends(self) -> Dict[str, BackendType]: - return - - def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: - if self.first_stage_algorithms: - raise NotImplementedError( - "Statistic points are not supported yet for SmoothQuant and ChannelAlignment algorithms." - ) - - output = StatisticPointsContainer() - for algorithm in self.algorithms: - for statistic_points in algorithm.get_statistic_points(model, graph).values(): - for statistic_point in statistic_points: - output.add_statistic_point(statistic_point) - return output - - def apply( - self, - model: TModel, - graph: NNCFGraph, - statistic_points: Optional[StatisticPointsContainer] = None, - dataset: Optional[Dataset] = None, - ) -> TModel: - modified_model = copy_model(model) - modified_model_graph = graph - backend = get_backend(modified_model) - - for algorithm in self.first_stage_algorithms: - if isinstance(algorithm, SmoothQuant) and backend != BackendType.OPENVINO: - nncf_logger.debug(f"{backend.name} does not support SmoothQuant algorithm yet.") - continue - - if isinstance(algorithm, ChannelAlignment) and backend != BackendType.OPENVINO: - nncf_logger.debug(f"{backend.name} does not support ChannelAlignment algorithm yet.") - continue - - statistics_aggregator = StatisticsAggregatorFactory.create(modified_model, dataset) - algo_statistic_points = algorithm.get_statistic_points(modified_model, modified_model_graph) - statistics_aggregator.register_statistic_points(algo_statistic_points) - statistics_aggregator.collect_statistics(modified_model, modified_model_graph) - modified_model = algorithm.apply( - modified_model, modified_model_graph, statistics_aggregator.statistic_points - ) - modified_model_graph = NNCFGraphFactory.create(modified_model) - - if statistic_points is None: - statistics_aggregator = StatisticsAggregatorFactory.create(modified_model, dataset) - for algorithm in self.algorithms: - algo_statistic_points = algorithm.get_statistic_points(modified_model, modified_model_graph) - statistics_aggregator.register_statistic_points(algo_statistic_points) - - statistics_aggregator.collect_statistics(modified_model, modified_model_graph) - statistic_points = statistics_aggregator.statistic_points - - for algorithm in self.algorithms[:-1]: - modified_model = algorithm.apply(modified_model, modified_model_graph, statistic_points) - modified_model_graph = NNCFGraphFactory.create(modified_model) - # building the model graph is not required after the last algorithm - modified_model = self.algorithms[-1].apply(modified_model, modified_model_graph, statistic_points) - - return modified_model diff --git a/nncf/quantization/algorithms/post_training/__init__.py b/nncf/quantization/pipelines/__init__.py similarity index 100% rename from nncf/quantization/algorithms/post_training/__init__.py rename to nncf/quantization/pipelines/__init__.py diff --git a/nncf/quantization/pipelines/pipeline.py b/nncf/quantization/pipelines/pipeline.py new file mode 100644 index 00000000000..770d89d34d7 --- /dev/null +++ b/nncf/quantization/pipelines/pipeline.py @@ -0,0 +1,38 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC +from abc import abstractmethod +from typing import TypeVar + +from nncf.data.dataset import Dataset + +TModel = TypeVar("TModel") + + +class Pipeline(ABC): + """ + A base class for creating pipelines that apply algorithms to a model. + + This abstract class serves as an interface for creating custom model + processing pipelines that encapsulate a series of algorithms to be + applied to a model using a provided dataset. + """ + + @abstractmethod + def run(self, model: TModel, dataset: Dataset) -> TModel: + """ + Abstract method that defines the sequence of algorithms to be + applied to the provided model using the provided dataset. + + :param model: A model to which pipeline will be applied. + :param dataset: A dataset that holds the data items for algorithms. + """ diff --git a/nncf/quantization/pipelines/post_training/__init__.py b/nncf/quantization/pipelines/post_training/__init__.py new file mode 100644 index 00000000000..9b29b47534a --- /dev/null +++ b/nncf/quantization/pipelines/post_training/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nncf/quantization/pipelines/post_training/pipeline.py b/nncf/quantization/pipelines/post_training/pipeline.py new file mode 100644 index 00000000000..01094ddf848 --- /dev/null +++ b/nncf/quantization/pipelines/post_training/pipeline.py @@ -0,0 +1,120 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, TypeVar + +from nncf.common.deprecation import warning_deprecated +from nncf.common.quantization.structs import QuantizationPreset +from nncf.parameters import ModelType +from nncf.parameters import TargetDevice +from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters +from nncf.quantization.algorithms.bias_correction.algorithm import BIAS_CORRECTION_THRESHOLD +from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection +from nncf.quantization.algorithms.channel_alignment.algorithm import ChannelAlignment +from nncf.quantization.algorithms.fast_bias_correction.algorithm import FAST_BIAS_CORRECTION_THRESHOLD +from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection +from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization +from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant +from nncf.quantization.pipelines.stepwise_pipeline import StepwisePipeline +from nncf.scopes import IgnoredScope + +TModel = TypeVar("TModel") + + +class PostTrainingQuantization(StepwisePipeline): + """ """ + + def __init__( + self, + preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + target_device: TargetDevice = TargetDevice.ANY, + subset_size: int = 300, + fast_bias_correction: bool = True, + model_type: Optional[ModelType] = None, + ignored_scope: Optional[IgnoredScope] = None, + advanced_parameters: Optional[AdvancedQuantizationParameters] = None, + ): + """ """ + if target_device is TargetDevice.VPU: + warning_deprecated("VPU device is deprecated and will no longer be supported in the future.") + + if advanced_parameters is None: + advanced_parameters = AdvancedQuantizationParameters() + + # Build the post-training quantization pipeline. + pipeline_steps = [] + + # Add the `SmoothQuant` algorithm as the first step of the pipeline. + # It is added only for `ModelType.TRANSFORMER`. + if model_type == ModelType.TRANSFORMER and advanced_parameters.smooth_quant_alpha >= 0: + pipeline_steps.append( + [ + SmoothQuant( + subset_size, advanced_parameters.inplace_statistics, advanced_parameters.smooth_quant_alpha + ) + ] + ) + + # Add the `ChannelAlignment` algorithm as the second step of the pipeline. + if not advanced_parameters.disable_channel_alignment: + pipeline_steps.append([ChannelAlignment(subset_size, advanced_parameters.inplace_statistics)]) + + # Add the `MinMaxQuantization` algorithm as the third step of the pipeline. + pipeline_steps.append( + [ + MinMaxQuantization( + preset, + target_device, + subset_size, + model_type, + ignored_scope, + advanced_parameters.overflow_fix, + advanced_parameters.quantize_outputs, + advanced_parameters.inplace_statistics, + advanced_parameters.activations_quantization_params, + advanced_parameters.weights_quantization_params, + advanced_parameters.activations_range_estimator_params, + advanced_parameters.weights_range_estimator_params, + advanced_parameters.backend_params, + ) + ] + ) + + if advanced_parameters.disable_bias_correction: + return + + # Add the `FastBiasCorrection` or `BiasCorrection` as additional algorithm + # inside the third step of the pipeline. It is added after `MinMaxQuantization` + # algorithm. + bias_correction_params = advanced_parameters.bias_correction_params + if fast_bias_correction: + threshold = FAST_BIAS_CORRECTION_THRESHOLD + bias_correction_subset_size = subset_size + bias_correction_cls = FastBiasCorrection + else: + threshold = BIAS_CORRECTION_THRESHOLD + bias_correction_subset_size = max(int(subset_size * 0.2), 1) + bias_correction_cls = BiasCorrection + + if bias_correction_params.threshold is not None: + threshold = bias_correction_params.threshold + + pipeline_steps[-1].append( + bias_correction_cls( + bias_correction_subset_size, + threshold, + bias_correction_params.apply_for_all_nodes, + advanced_parameters.inplace_statistics, + advanced_parameters.backend_params, + ) + ) + + super().__init__(pipeline_steps) diff --git a/nncf/quantization/pipelines/stepwise_pipeline.py b/nncf/quantization/pipelines/stepwise_pipeline.py new file mode 100644 index 00000000000..c83de92f903 --- /dev/null +++ b/nncf/quantization/pipelines/stepwise_pipeline.py @@ -0,0 +1,175 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Optional, TypeVar + +from nncf.common.factory import NNCFGraphFactory +from nncf.common.factory import StatisticsAggregatorFactory +from nncf.common.graph.graph import NNCFGraph +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer +from nncf.data.dataset import Dataset +from nncf.quantization.algorithms.algorithm import Algorithm +from nncf.quantization.pipelines.pipeline import Pipeline + +TModel = TypeVar("TModel") +PipelineStep = List[Algorithm] + + +def get_statistic_points(pipeline_step: PipelineStep, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: + """ + TODO + + :param pipeline_step: + :param model: + :param graph: + :return: + """ + container = StatisticPointsContainer() + for algorithm in pipeline_step: + for statistic_points in algorithm.get_statistic_points(model, graph).values(): + for statistic_point in statistic_points: + container.add_statistic_point(statistic_point) + + return container + + +def collect_statistics( + statistic_points: StatisticPointsContainer, model: TModel, graph: NNCFGraph, dataset: Dataset +) -> StatisticPointsContainer: + """ + TODO: + + :param statistic_points: + :param model: + :param graph: + :param dataset: + :return: + """ + statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) + statistics_aggregator.register_statistic_points(statistic_points) + statistics_aggregator.collect_statistics(model, graph) + + return statistics_aggregator.statistic_points + + +class StepwisePipeline(Pipeline): + """ + A class for creating sequential model processing pipelines with distinct steps. + + This class extends the base `Pipeline` class to provide access to each distinct + step of the pipeline. Each pipeline step is a sequence of `Algorithm` class + instances whose statistic points are combained and collected using the model + that was obtained after previous pipeline step. Collected statistic points are + used for all algorothms in this step. + """ + + def __init__(self, pipeline_steps: List[PipelineStep]): + """ + :param pipeline_steps: A sequence of pipeline steps to be executed in order. + """ + self._pipeline_steps = pipeline_steps + + @property + def pipeline_steps(self) -> List[PipelineStep]: + """ + Property that defines the sequence of distinct pipeline steps to + be executed in order. + + :return: A sequence of pipeline steps to be executed in order. + """ + return self._pipeline_steps + + def run(self, model: TModel, dataset: Dataset) -> TModel: + """ + TODO: + + :param model: A model to which pipeline will be applied. + :param dataset: A dataset that holds the data items for algorithms. + :return: The updated model after executing the entire pipeline. + """ + return run_pipeline_from_step(self, model, dataset) + + +def run_pipeline_step( + pipeline_step: PipelineStep, + pipeline_step_statistics: StatisticPointsContainer, + model: TModel, + graph: NNCFGraph, +) -> TModel: + """ + Executes a provided pipeline step on the provided model. + + :param pipeline_step: A sequence of algorithms representing a pipeline step. + :param pipeline_step_statistics: Statistics required to execute a pipeline step. + :param model: A model to which a pipeline step will be applied. + :param graph: A graph assosiated with a model. + :return: The updated model after executing the pipeline step. + """ + current_model = model + current_graph = graph + + for algorithm in pipeline_step[:-1]: + current_model = algorithm.apply(current_model, current_graph, pipeline_step_statistics) + current_graph = NNCFGraphFactory.create(current_model) + current_model = pipeline_step[-1].apply(current_model, current_graph, pipeline_step_statistics) + + return current_model + + +def run_pipeline_from_step( + pipeline: StepwisePipeline, + model: TModel, + dataset: Dataset, + graph: Optional[NNCFGraph], + start_step_index: int = 0, + step_index_to_statistics: Optional[Dict[int, StatisticPointsContainer]] = None, +) -> TModel: + """ + Execute the pipeline from the specified pipeline step to the end. + + :param pipeline: A pipeline part of which should be executed. + :param model: This is the model after the (start_step_index - 1)-th pipeline + step, or the initial model if start_step_index is 0. + :param dataset: A dataset that holds the data items for pipeline steps. + :param graph: A graph assosiated with a model. + :param start_step_index: Zero-based pipeline step index from which the pipeline + should be executed. + :param step_index_to_statistics: A mapping from pipeline step index to statistics + required to execute pipeline step. + :return: The updated model after executing the pipeline from the specified pipeline + step to the end. + """ + if step_index_to_statistics is None: + step_index_to_statistics = {} + + # The `step_model` and `step_graph` entities are required to execute `step_index`-th pipeline step + step_model = model + step_graph = graph + step_index = start_step_index + + for pipeline_step in pipeline.pipeline_steps[start_step_index:]: + # Create graph required to run current pipeline step + if step_graph is None: + step_graph = NNCFGraphFactory.create(step_model) + + # Collect statistics required to run current pipeline step + step_statistics = step_index_to_statistics.get(step_index) + if step_statistics is None: + statistic_points = get_statistic_points(pipeline_step, step_model, step_graph) + step_statistics = collect_statistics(statistic_points, step_model, step_graph, dataset) + + # Run current pipeline step + step_model = run_pipeline_step(pipeline_step, step_statistics, step_model, step_graph) + + step_graph = None # We should rebuild the graph for the next pipeline step + step_index += 1 + + return step_model From 877e6cb512d47f03d7deb5e9f24b3d14e63072d3 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Fri, 15 Sep 2023 09:57:52 +0100 Subject: [PATCH 2/7] Add the implementation of the HyperparameterTuner class derived from the Pipeline class --- .../hyperparameter_tuner/__init__.py | 0 .../hyperparameter_tuner/param_grid.py | 0 .../hyperparameter_tuner/pipeline.py} | 159 ++++++++++++------ .../pipelines/stepwise_pipeline.py | 13 +- nncf/quantization/quantize_model.py | 2 +- 5 files changed, 114 insertions(+), 60 deletions(-) rename nncf/quantization/{algorithms => pipelines}/hyperparameter_tuner/__init__.py (100%) rename nncf/quantization/{algorithms => pipelines}/hyperparameter_tuner/param_grid.py (100%) rename nncf/quantization/{algorithms/hyperparameter_tuner/algorithm.py => pipelines/hyperparameter_tuner/pipeline.py} (70%) diff --git a/nncf/quantization/algorithms/hyperparameter_tuner/__init__.py b/nncf/quantization/pipelines/hyperparameter_tuner/__init__.py similarity index 100% rename from nncf/quantization/algorithms/hyperparameter_tuner/__init__.py rename to nncf/quantization/pipelines/hyperparameter_tuner/__init__.py diff --git a/nncf/quantization/algorithms/hyperparameter_tuner/param_grid.py b/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py similarity index 100% rename from nncf/quantization/algorithms/hyperparameter_tuner/param_grid.py rename to nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py diff --git a/nncf/quantization/algorithms/hyperparameter_tuner/algorithm.py b/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py similarity index 70% rename from nncf/quantization/algorithms/hyperparameter_tuner/algorithm.py rename to nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py index fba7b984278..781f206b9a7 100644 --- a/nncf/quantization/algorithms/hyperparameter_tuner/algorithm.py +++ b/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py @@ -17,9 +17,9 @@ from typing import Any, Callable, Dict, Iterable, List, Tuple, Type, TypeVar, Union from nncf.common.factory import NNCFGraphFactory -from nncf.common.factory import StatisticsAggregatorFactory from nncf.common.graph.graph import NNCFGraph from nncf.common.logging import nncf_logger +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import get_backend from nncf.common.utils.timer import timer from nncf.data.dataset import Dataset @@ -27,7 +27,12 @@ from nncf.quantization.algorithms.accuracy_control.evaluator import MetricResults from nncf.quantization.algorithms.accuracy_control.rank_functions import create_normalized_mse_func from nncf.quantization.algorithms.accuracy_control.subset_selection import select_subset -from nncf.quantization.algorithms.algorithm import Algorithm +from nncf.quantization.pipelines.pipeline import Pipeline +from nncf.quantization.pipelines.stepwise_pipeline import StepwisePipeline +from nncf.quantization.pipelines.stepwise_pipeline import collect_statistics +from nncf.quantization.pipelines.stepwise_pipeline import get_statistic_points +from nncf.quantization.pipelines.stepwise_pipeline import run_pipeline_from_step +from nncf.quantization.pipelines.stepwise_pipeline import run_pipeline_step TModel = TypeVar("TModel") TTensor = TypeVar("TTensor") @@ -175,7 +180,7 @@ def find_best_combination( return best_combination_key -class HyperparameterTuner: +class HyperparameterTuner(Pipeline): """ This algorithm is used to find a best combination of parameters from `param_grid`. @@ -186,7 +191,7 @@ class HyperparameterTuner: "param_name": [0.1, 0.2], } - The parameters names should be same as in `algorithm_cls.__init__()` method. + The parameters names should be same as in `pipeline_cls.__init__()` method. In case when "param_name" parameter is a dataclass object there is a way to specify settings to try for his fields using marker ":" @@ -214,9 +219,9 @@ class HyperparameterTuner: def __init__( self, - algorithm_cls: Type[Algorithm], + pipeline_cls: Type[StepwisePipeline], init_params: Dict[str, Any], - param_grid: Dict[str, List[Any]], + param_grids: List[Dict[str, List[Any]]], calibration_dataset: Dataset, validation_fn: Callable[[Any, Iterable[Any]], Tuple[float, Union[None, List[float], List[List[TTensor]]]]], subset_size: int, @@ -224,7 +229,7 @@ def __init__( quantized_metric_results: MetricResults, ): """ - :param algorithm_cls: Class of algorithm. + :param pipeline_cls: Class of pipeline. :param init_params: Initial set of parameters used to create algorithm. :param param_grid: Dictionary with parameters names as keys and list of parameter settings to try as values. @@ -235,9 +240,9 @@ def __init__( :param initial_metric_results: Metric results for initial model. :param quantized_metric_results: Metric results for quantized with `init_params` model. """ - self._algorithm_cls = algorithm_cls + self._pipeline_cls = pipeline_cls self._init_params = init_params - self._param_grid = param_grid + self._param_grids = param_grids self._calibration_dataset = calibration_dataset self._evaluator = Evaluator(validation_fn) self._subset_size = subset_size @@ -246,22 +251,22 @@ def __init__( self._is_metric_mode = isinstance(self._initial_metric_results.values_for_each_item[0], float) - # # Will be initialized inside `apply()` method + # Will be initialized inside `run()` method self._error_fn = None - # Will be initialized inside `_initialize_algorithms()` method - self._algorithms: Dict[CombinationKey, Algorithm] = {} - self._statistic_points = None + # Will be initialized inside `_prepare_pipeline_step()` method + self._pipelines: Dict[CombinationKey, StepwisePipeline] = {} + self._step_index_to_statistics: Dict[int, StatisticPointsContainer] = {} self._calculated_scores: Dict[CombinationKey, float] = {} - def apply(self, model: TModel, validation_dataset: Dataset) -> TModel: + def run(self, model: TModel, dataset: Dataset) -> TModel: """ - Applies algorithm to input model. + TODO: - :param model: Input model. - :param validation_dataset: Dataset used to validate resulted model. - :return: Resulted model. + :param model: + :param dataset: + :return: """ if self._is_metric_mode: self._error_fn = operator.sub @@ -275,58 +280,93 @@ def apply(self, model: TModel, validation_dataset: Dataset) -> TModel: self._error_fn, ) - combinations = create_combinations(self._param_grid) + step_model = model # The model on which the `step_index`-th pipeline step will be executed + best_settings = {} + + for step_index, step_param_grid in enumerate(self._param_grids): + step_graph = NNCFGraphFactory.create(step_model) + + # If there are no parameters to optimize for the current step, simply execute + # this pipeline step on the model. + if step_param_grid is None: + # TODO(andrey-churkin): Think about how it can be avoided. + params = apply_combination(self._init_params, best_settings) + pipeline_step = self._pipeline_cls(**params).pipeline_steps[step_index] + container = get_statistic_points(pipeline_step, step_model, step_graph) + step_statistics = collect_statistics(container, step_model, step_graph, self._calibration_dataset) + step_model = run_pipeline_step(pipeline_step, step_statistics, step_model, step_graph) + continue - initial_graph = NNCFGraphFactory.create(model) + step_combinations = create_combinations(step_param_grid) - nncf_logger.info("Start initialization of algorithms") - with timer(): - self._prepare_algorithms(model, initial_graph, combinations) + nncf_logger.info(f"Start preparation for {step_index}-th pipeline step") + with timer(): + self._prepare_pipeline_step(step_index, step_model, step_graph, step_combinations, best_settings) - combination_score_fn = functools.partial( - self._calculate_combination_score, - initial_model=model, - initial_graph=initial_graph, - dataset=validation_dataset, - subset_indices=subset_indices, - ) + combination_score_fn = functools.partial( + self._calculate_combination_score, + step_index=step_index, + step_model=step_model, + step_graph=step_graph, + dataset=dataset, + subset_indices=subset_indices, + ) + + nncf_logger.info("Start search best combination of parameters") + with timer(): + step_best_combination_key = find_best_combination( + step_combinations, combination_score_fn, step_param_grid + ) - nncf_logger.info("Start search best combination of parameters") - with timer(): - best_combination_key = find_best_combination(combinations, combination_score_fn, self._param_grid) + best_settings.update(step_combinations[step_best_combination_key]) + pipeline_step = self._pipelines[step_best_combination_key].pipeline_steps[step_index] + step_model = run_pipeline_step( + pipeline_step, self._step_index_to_statistics[step_index], step_model, step_graph + ) - algorithm = self._algorithms[best_combination_key] - result_model = algorithm.apply(model, initial_graph, self._statistic_points) + # TODO(andrey-churkin): Show final best settings - return result_model + return step_model - def _prepare_algorithms( - self, initial_model: TModel, initial_graph: NNCFGraph, combinations: Dict[CombinationKey, Combination] + def _prepare_pipeline_step( + self, + step_index: int, + step_model: TModel, + step_graph: NNCFGraph, + step_combinations: Dict[CombinationKey, Combination], ) -> None: """ - Creates algorithm for each combination of parameters. Collects statistics for - created algorithms. + TODO: - :param initial_model: Input model used to collect statistics for algorithms. - :param combinations: Combinations of parameters. + :param step_index: + :param step_model: + :param step_graph: + :param step_combinations: """ - for combination_key, combination in combinations.items(): - kwargs = apply_combination(self._init_params, combination) - self._algorithms[combination_key] = self._algorithm_cls(**kwargs) + # Create a separate pipeline for each combination - # Collect required statistics for created algorithms - stats_aggregator = StatisticsAggregatorFactory.create(initial_model, self._calibration_dataset) - for algorithm in self._algorithms.values(): - statistic_points = algorithm.get_statistic_points(initial_model, initial_graph) - stats_aggregator.register_statistic_points(statistic_points) - stats_aggregator.collect_statistics(initial_model, initial_graph) - self._statistic_points = stats_aggregator.statistic_points + # TODO(andrey-churkin): Think about how it can be avoided. In an ideal scenario, + # we would have only one pipeline and set parameters directly within it. + self._pipelines = {} + for combination_key, combination in step_combinations.items(): + kwargs = apply_combination(self._init_params, combination) + self._pipelines[combination_key] = self._pipeline_cls(**kwargs) + + # Collect statistics required to execute `step_index`-th pipeline step + containers = [ + get_statistic_points(pipeline.pipeline_steps[step_index], step_model, step_graph) + for pipeline in self._pipelines.values() + ] + self._step_index_to_statistics[step_index] = collect_statistics( + containers, step_model, step_graph, self._calibration_dataset + ) def _calculate_combination_score( self, combination_key: CombinationKey, - initial_model: TModel, - initial_graph: NNCFGraph, + step_index: int, + step_model: TModel, + step_graph: NNCFGraph, dataset: Dataset, subset_indices: List[int], ) -> float: @@ -343,8 +383,15 @@ def _calculate_combination_score( if combination_key in self._calculated_scores: return self._calculated_scores[combination_key] - algorithm = self._algorithms[combination_key] - model = algorithm.apply(initial_model, initial_graph, self._statistic_points) + model = run_pipeline_from_step( + self._pipelines[combination_key], + step_model, + self._calibration_dataset, + step_graph, + step_index, + self._step_index_to_statistics, + ) + score = self._validate_model(model, dataset, subset_indices) self._calculated_scores[combination_key] = score diff --git a/nncf/quantization/pipelines/stepwise_pipeline.py b/nncf/quantization/pipelines/stepwise_pipeline.py index c83de92f903..f659665210e 100644 --- a/nncf/quantization/pipelines/stepwise_pipeline.py +++ b/nncf/quantization/pipelines/stepwise_pipeline.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Optional, TypeVar +from typing import Dict, List, Optional, TypeVar, Union from nncf.common.factory import NNCFGraphFactory from nncf.common.factory import StatisticsAggregatorFactory @@ -42,7 +42,10 @@ def get_statistic_points(pipeline_step: PipelineStep, model: TModel, graph: NNCF def collect_statistics( - statistic_points: StatisticPointsContainer, model: TModel, graph: NNCFGraph, dataset: Dataset + containers: Union[StatisticPointsContainer, List[StatisticPointsContainer]], + model: TModel, + graph: NNCFGraph, + dataset: Dataset, ) -> StatisticPointsContainer: """ TODO: @@ -53,8 +56,12 @@ def collect_statistics( :param dataset: :return: """ + if not isinstance(containers, list): + containers = [containers] + statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) - statistics_aggregator.register_statistic_points(statistic_points) + for container in containers: + statistics_aggregator.register_statistic_points(container) statistics_aggregator.collect_statistics(model, graph) return statistics_aggregator.statistic_points diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index c52dedd7021..de00bf2ead2 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -24,7 +24,7 @@ from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.algorithms.accuracy_control.evaluator import MetricResults from nncf.quantization.algorithms.hyperparameter_tuner.algorithm import HyperparameterTuner -from nncf.quantization.algorithms.hyperparameter_tuner.param_grid import get_quantization_param_grid +from nncf.quantization.pipelines.hyperparameter_tuner.param_grid import get_quantization_param_grid from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.scopes import IgnoredScope From dc7b61dd1c4efc63d6371f6ce7ff32917f733ee0 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Mon, 18 Sep 2023 11:50:48 +0100 Subject: [PATCH 3/7] Add fixes --- nncf/openvino/quantization/quantize_model.py | 5 +- .../hyperparameter_tuner/param_grid.py | 51 ++++++++++++++++--- .../hyperparameter_tuner/pipeline.py | 8 ++- .../pipelines/stepwise_pipeline.py | 2 +- nncf/quantization/quantize_model.py | 12 ++--- 5 files changed, 59 insertions(+), 19 deletions(-) diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index cefa084cd7d..22abeff4919 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -32,7 +32,7 @@ from nncf.quantization.algorithms.accuracy_control.algorithm import QuantizationAccuracyRestorer from nncf.quantization.algorithms.accuracy_control.algorithm import calculate_accuracy_drop from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.quantization.quantize_model import quantize_with_tune_hyperparams from nncf.quantization.telemetry_extractors import CompressionStartedWithQuantizeApi from nncf.scopes import IgnoredScope @@ -116,8 +116,7 @@ def native_quantize_impl( advanced_parameters=advanced_parameters, ) - graph = GraphConverter.create_nncf_graph(model) - quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) + quantized_model = quantization_algorithm.run(model, calibration_dataset) if is_weight_compression_needed(advanced_parameters): compress_quantize_weights_transformation(quantized_model) diff --git a/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py b/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py index fe0baa1b833..28019394bec 100644 --- a/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py +++ b/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py @@ -10,19 +10,24 @@ # limitations under the License. import itertools -from typing import Any, Dict +from typing import Any, Dict, List from nncf.common.quantization.structs import QuantizationPreset +from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection +from nncf.quantization.algorithms.channel_alignment.algorithm import ChannelAlignment +from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection +from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization +from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.quantization.range_estimator import AggregatorType from nncf.quantization.range_estimator import RangeEstimatorParameters from nncf.quantization.range_estimator import StatisticsCollectorParameters from nncf.quantization.range_estimator import StatisticsType +ParamGrid = Dict[str, List[Any]] -def get_quantization_param_grid() -> Dict[str, Any]: - """ - Returns params grid for post-training quantization algorithm. - """ + +def _get_minmax_quantization_param_grid() -> ParamGrid: min_param_values = [ StatisticsCollectorParameters( statistics_type=StatisticsType.MIN, @@ -58,7 +63,6 @@ def get_quantization_param_grid() -> Dict[str, Any]: param_grid = { "preset": [QuantizationPreset.PERFORMANCE, QuantizationPreset.MIXED], - "fast_bias_correction": [True, False], "advanced_parameters:weights_range_estimator_params": [ RangeEstimatorParameters( min=StatisticsCollectorParameters(statistics_type=StatisticsType.MIN), @@ -70,5 +74,38 @@ def get_quantization_param_grid() -> Dict[str, Any]: for min_v, max_v in itertools.product(min_param_values, max_param_values) ], } - return param_grid + + +def _get_smooth_quant_param_grid() -> ParamGrid: + return {"advanced_parameters:smooth_quant_alpha": [0.2, 0.5, 0.9, 0.95]} + + +def _get_channel_alignment_param_grid() -> ParamGrid: + return {} + + +def _get_bias_correction_param_grid() -> ParamGrid: + return {"fast_bias_correction": [True, False]} + + +def get_quantization_param_grids(pipeline: PostTrainingQuantization) -> List[ParamGrid]: + """ + Returns params grid for post-training quantization algorithm. + """ + algorithm_cls_to_param_grid = { + SmoothQuant: _get_smooth_quant_param_grid(), + ChannelAlignment: _get_channel_alignment_param_grid(), + MinMaxQuantization: _get_minmax_quantization_param_grid(), + FastBiasCorrection: _get_bias_correction_param_grid(), + BiasCorrection: _get_bias_correction_param_grid(), + } + + param_grids = [] + for step in pipeline.pipeline_steps: + param_grid = {} + for algorithm in step: + param_grid.update(algorithm_cls_to_param_grid[algorithm.__class__]) + param_grids.append(param_grid) + + return param_grids diff --git a/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py b/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py index 781f206b9a7..85407b8d38e 100644 --- a/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py +++ b/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py @@ -288,7 +288,7 @@ def run(self, model: TModel, dataset: Dataset) -> TModel: # If there are no parameters to optimize for the current step, simply execute # this pipeline step on the model. - if step_param_grid is None: + if not step_param_grid: # TODO(andrey-churkin): Think about how it can be avoided. params = apply_combination(self._init_params, best_settings) pipeline_step = self._pipeline_cls(**params).pipeline_steps[step_index] @@ -334,6 +334,7 @@ def _prepare_pipeline_step( step_model: TModel, step_graph: NNCFGraph, step_combinations: Dict[CombinationKey, Combination], + best_settings, ) -> None: """ TODO: @@ -349,7 +350,10 @@ def _prepare_pipeline_step( # we would have only one pipeline and set parameters directly within it. self._pipelines = {} for combination_key, combination in step_combinations.items(): - kwargs = apply_combination(self._init_params, combination) + settings = {} + settings.update(combination) + settings.update(best_settings) + kwargs = apply_combination(self._init_params, settings) self._pipelines[combination_key] = self._pipeline_cls(**kwargs) # Collect statistics required to execute `step_index`-th pipeline step diff --git a/nncf/quantization/pipelines/stepwise_pipeline.py b/nncf/quantization/pipelines/stepwise_pipeline.py index f659665210e..e38db709b53 100644 --- a/nncf/quantization/pipelines/stepwise_pipeline.py +++ b/nncf/quantization/pipelines/stepwise_pipeline.py @@ -135,7 +135,7 @@ def run_pipeline_from_step( pipeline: StepwisePipeline, model: TModel, dataset: Dataset, - graph: Optional[NNCFGraph], + graph: Optional[NNCFGraph] = None, start_step_index: int = 0, step_index_to_statistics: Optional[Dict[int, StatisticPointsContainer]] = None, ) -> TModel: diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index de00bf2ead2..6e4acbb4ac1 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -23,9 +23,9 @@ from nncf.quantization.advanced_parameters import AdvancedAccuracyRestorerParameters from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.algorithms.accuracy_control.evaluator import MetricResults -from nncf.quantization.algorithms.hyperparameter_tuner.algorithm import HyperparameterTuner -from nncf.quantization.pipelines.hyperparameter_tuner.param_grid import get_quantization_param_grid -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.pipelines.hyperparameter_tuner.pipeline import HyperparameterTuner +from nncf.quantization.pipelines.hyperparameter_tuner.param_grid import get_quantization_param_grids +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.scopes import IgnoredScope TTensor = TypeVar("TTensor") @@ -299,12 +299,12 @@ def quantize_with_tune_hyperparams( "advanced_parameters": advanced_quantization_parameters, } - quantization_param_grid = get_quantization_param_grid() + param_grids = get_quantization_param_grids(PostTrainingQuantization(**init_quantization_params)) hyperparameter_tuner = HyperparameterTuner( PostTrainingQuantization, init_quantization_params, - quantization_param_grid, + param_grids, calibration_dataset, validation_fn, tuner_subset_size, @@ -312,6 +312,6 @@ def quantize_with_tune_hyperparams( quantized_metric_results, ) - quantized_model = hyperparameter_tuner.apply(model, validation_dataset) + quantized_model = hyperparameter_tuner.run(model, validation_dataset) return quantized_model From 69732b9560da1a07a3f4c2638cce663f8ded9d14 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Mon, 18 Sep 2023 14:06:10 +0100 Subject: [PATCH 4/7] Fix tests --- .../torch/quantization/quantize_model.py | 8 +- nncf/onnx/quantization/quantize_model.py | 8 +- nncf/openvino/quantization/quantize_model.py | 1 - .../algorithms/channel_alignment/algorithm.py | 17 +++- .../algorithms/smooth_quant/algorithm.py | 10 +++ .../pipelines/post_training/pipeline.py | 83 ++++++++++++------- nncf/quantization/quantize_model.py | 2 +- .../test_hyperparameter_tuner.py | 6 +- tests/onnx/quantization/common.py | 8 +- tests/onnx/quantization/test_ptq_params.py | 6 +- .../native/quantization/test_ptq_params.py | 6 +- .../test_templates/test_bias_correction.py | 12 ++- .../test_fast_bias_correction.py | 8 +- .../test_templates/test_ptq_params.py | 18 ++-- .../test_templates/test_quantizer_config.py | 18 ++-- .../test_templates/test_smooth_quant.py | 7 +- tests/torch/ptq/test_fq_params_calculation.py | 19 ++--- tests/torch/ptq/test_graphs.py | 7 +- tests/torch/ptq/test_ptq_params.py | 6 +- 19 files changed, 141 insertions(+), 109 deletions(-) diff --git a/nncf/experimental/torch/quantization/quantize_model.py b/nncf/experimental/torch/quantization/quantize_model.py index c97ab9c9675..44a5c0053ae 100644 --- a/nncf/experimental/torch/quantization/quantize_model.py +++ b/nncf/experimental/torch/quantization/quantize_model.py @@ -18,7 +18,7 @@ from nncf.parameters import ModelType from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.scopes import IgnoredScope from nncf.torch.dynamic_graph.context import no_nncf_trace from nncf.torch.dynamic_graph.io_handling import replicate_same_tensors @@ -105,7 +105,7 @@ def quantize_impl( nncf_network = create_nncf_network(model.eval(), calibration_dataset) - quantization_algorithm = PostTrainingQuantization( + quantization_pipeline = PostTrainingQuantization( preset=preset, target_device=target_device, subset_size=subset_size, @@ -115,9 +115,7 @@ def quantize_impl( advanced_parameters=advanced_parameters, ) - quantized_model = quantization_algorithm.apply( - nncf_network, nncf_network.nncf.get_graph(), dataset=calibration_dataset - ) + quantized_model = quantization_pipeline.run(nncf_network, calibration_dataset) quantized_model.nncf.disable_dynamic_graph_building() diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py index a88a9213f36..4d3cf404749 100644 --- a/nncf/onnx/quantization/quantize_model.py +++ b/nncf/onnx/quantization/quantize_model.py @@ -16,11 +16,10 @@ from nncf.common.logging.logger import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset -from nncf.onnx.graph.nncf_graph_builder import GraphConverter from nncf.parameters import ModelType from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.quantization.telemetry_extractors import CompressionStartedWithQuantizeApi from nncf.scopes import IgnoredScope from nncf.telemetry import tracked_function @@ -56,7 +55,7 @@ def quantize_impl( advanced_parameters.weights_quantization_params.per_channel = False advanced_parameters.activations_quantization_params.per_channel = False - quantization_algorithm = PostTrainingQuantization( + quantization_pipeline = PostTrainingQuantization( preset=preset, target_device=target_device, subset_size=subset_size, @@ -66,7 +65,6 @@ def quantize_impl( advanced_parameters=advanced_parameters, ) - graph = GraphConverter.create_nncf_graph(model) - quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) + quantized_model = quantization_pipeline.run(model, calibration_dataset) return quantized_model diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index 22abeff4919..2dc64a10dfa 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -19,7 +19,6 @@ from nncf.common.logging import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset -from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.quantization.backend_parameters import BackendParameters from nncf.openvino.quantization.backend_parameters import is_weight_compression_needed from nncf.openvino.quantization.weights_compression import insert_pre_compression_operations diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index c5cf65dfefd..458e6146e9c 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -22,6 +22,7 @@ from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer @@ -97,6 +98,12 @@ def apply( statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: + if model is not None: + backend = get_backend(model) + if backend != BackendType.OPENVINO: + nncf_logger.debug(f"{backend.name} does not support ChannelAlignment algorithm yet.") + return model + self._set_backend_entity(model) model_transformer = ModelTransformerFactory.create(model) transformation_layout = TransformationLayout() @@ -368,9 +375,15 @@ def _get_target_point_and_node_in(self, conv_in, add_in) -> Tuple[TargetPoint, N ) def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: - self._set_backend_entity(model) - statistic_container = StatisticPointsContainer() + + if model is not None: + backend = get_backend(model) + if backend != BackendType.OPENVINO: + nncf_logger.debug(f"{backend.name} does not support ChannelAlignment algorithm yet.") + return statistic_container + + self._set_backend_entity(model) for conv_in, add_in, _ in self._get_node_pairs(graph): target_point, node_in = self._get_target_point_and_node_in(conv_in, add_in) channel_axis = conv_in.metatype.output_channel_axis diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index a9ccdef9e10..3a1478a5b2c 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -101,6 +101,11 @@ def apply( statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: + backend = get_backend(model) + if backend != BackendType.OPENVINO: + nncf_logger.debug(f"{backend.name} does not support SmoothQuant algorithm yet.") + return model + self._set_backend_entity(model) nodes_to_smooth_data = self._get_nodes_to_smooth_data(graph) @@ -221,6 +226,11 @@ def filter_func(point: StatisticPoint) -> bool: def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: statistic_container = StatisticPointsContainer() + backend = get_backend(model) + if backend != BackendType.OPENVINO: + nncf_logger.debug(f"{backend.name} does not support SmoothQuant algorithm yet.") + return statistic_container + self._set_backend_entity(model) nodes_to_smooth_data = self._get_nodes_to_smooth_data(graph) diff --git a/nncf/quantization/pipelines/post_training/pipeline.py b/nncf/quantization/pipelines/post_training/pipeline.py index 01094ddf848..24105dbdb7e 100644 --- a/nncf/quantization/pipelines/post_training/pipeline.py +++ b/nncf/quantization/pipelines/post_training/pipeline.py @@ -30,7 +30,14 @@ class PostTrainingQuantization(StepwisePipeline): - """ """ + """ + A class for creating a post-training quantization pipeline. + The post-training quantization pipeline includes the following steps: + 1) SmoothQuant + 2) ChannelAlignment + 3) MinMaxQuantization + 4) FastBiasCorrection or BiasCorrection + """ def __init__( self, @@ -42,7 +49,27 @@ def __init__( ignored_scope: Optional[IgnoredScope] = None, advanced_parameters: Optional[AdvancedQuantizationParameters] = None, ): - """ """ + """ + :param preset: A preset that controls the quantization mode + (symmetric and asymmetric). It can take the following values: + - `performance`: Symmetric quantization of weights and activations. + - `mixed`: Symmetric quantization of weights and asymmetric + quantization of activations. + :param target_device: A target device the specificity of which will be taken + into account while compressing in order to obtain the best performance + for this type of device. + :param subset_size: Size of a subset to calculate activations + statistics used for quantization. + :param fast_bias_correction: Setting this option to `False` enables a different + bias correction method which is more accurate, in general, and takes + more time but requires less memory. + :param model_type: Model type is needed to specify additional patterns + in the model. Supported only `transformer` now. + :param ignored_scope: An ignored scope that defined the list of model control + flow graph nodes to be ignored during quantization. + :param advanced_parameters: Advanced quantization parameters for + fine-tuning the quantization algorithm + """ if target_device is TargetDevice.VPU: warning_deprecated("VPU device is deprecated and will no longer be supported in the future.") @@ -88,33 +115,31 @@ def __init__( ] ) - if advanced_parameters.disable_bias_correction: - return - - # Add the `FastBiasCorrection` or `BiasCorrection` as additional algorithm - # inside the third step of the pipeline. It is added after `MinMaxQuantization` - # algorithm. - bias_correction_params = advanced_parameters.bias_correction_params - if fast_bias_correction: - threshold = FAST_BIAS_CORRECTION_THRESHOLD - bias_correction_subset_size = subset_size - bias_correction_cls = FastBiasCorrection - else: - threshold = BIAS_CORRECTION_THRESHOLD - bias_correction_subset_size = max(int(subset_size * 0.2), 1) - bias_correction_cls = BiasCorrection - - if bias_correction_params.threshold is not None: - threshold = bias_correction_params.threshold - - pipeline_steps[-1].append( - bias_correction_cls( - bias_correction_subset_size, - threshold, - bias_correction_params.apply_for_all_nodes, - advanced_parameters.inplace_statistics, - advanced_parameters.backend_params, + if not advanced_parameters.disable_bias_correction: + # Add the `FastBiasCorrection` or `BiasCorrection` as additional algorithm + # inside the third step of the pipeline. It is added after `MinMaxQuantization` + # algorithm. + bias_correction_params = advanced_parameters.bias_correction_params + if fast_bias_correction: + threshold = FAST_BIAS_CORRECTION_THRESHOLD + bias_correction_subset_size = subset_size + bias_correction_cls = FastBiasCorrection + else: + threshold = BIAS_CORRECTION_THRESHOLD + bias_correction_subset_size = max(int(subset_size * 0.2), 1) + bias_correction_cls = BiasCorrection + + if bias_correction_params.threshold is not None: + threshold = bias_correction_params.threshold + + pipeline_steps[-1].append( + bias_correction_cls( + bias_correction_subset_size, + threshold, + bias_correction_params.apply_for_all_nodes, + advanced_parameters.inplace_statistics, + advanced_parameters.backend_params, + ) ) - ) super().__init__(pipeline_steps) diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index 6e4acbb4ac1..2f5aa05cdc8 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -23,8 +23,8 @@ from nncf.quantization.advanced_parameters import AdvancedAccuracyRestorerParameters from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.algorithms.accuracy_control.evaluator import MetricResults -from nncf.quantization.pipelines.hyperparameter_tuner.pipeline import HyperparameterTuner from nncf.quantization.pipelines.hyperparameter_tuner.param_grid import get_quantization_param_grids +from nncf.quantization.pipelines.hyperparameter_tuner.pipeline import HyperparameterTuner from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.scopes import IgnoredScope diff --git a/tests/common/hyperparameter_tuner/test_hyperparameter_tuner.py b/tests/common/hyperparameter_tuner/test_hyperparameter_tuner.py index f057cfa18e8..c5c4d022f9c 100644 --- a/tests/common/hyperparameter_tuner/test_hyperparameter_tuner.py +++ b/tests/common/hyperparameter_tuner/test_hyperparameter_tuner.py @@ -16,9 +16,9 @@ import pytest -from nncf.quantization.algorithms.hyperparameter_tuner.algorithm import apply_combination -from nncf.quantization.algorithms.hyperparameter_tuner.algorithm import create_combinations -from nncf.quantization.algorithms.hyperparameter_tuner.algorithm import find_best_combination +from nncf.quantization.pipelines.hyperparameter_tuner.pipeline import apply_combination +from nncf.quantization.pipelines.hyperparameter_tuner.pipeline import create_combinations +from nncf.quantization.pipelines.hyperparameter_tuner.pipeline import find_best_combination CombinationKey = Tuple[int, ...] Combination = Dict[str, Any] diff --git a/tests/onnx/quantization/common.py b/tests/onnx/quantization/common.py index 1d3464882fe..6e0bfd76461 100644 --- a/tests/onnx/quantization/common.py +++ b/tests/onnx/quantization/common.py @@ -20,8 +20,8 @@ from nncf.onnx.graph.onnx_graph import ONNXGraph from nncf.onnx.statistics.statistics import ONNXMinMaxTensorStatistic from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.quantization.fake_quantize import FakeQuantizeParameters +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from tests.onnx.common import get_random_generator from tests.onnx.opset_converter import convert_opset_version from tests.shared.nx_graph import check_nx_graph @@ -95,7 +95,6 @@ def min_max_quantize_model( ) -> onnx.ModelProto: if convert_model_opset: original_model = convert_opset_version(original_model) - graph = GraphConverter.create_nncf_graph(original_model) dataset = get_random_dataset_for_test(original_model, dataset_has_batch_size) quantization_params = {} if quantization_params is None else quantization_params @@ -105,7 +104,7 @@ def min_max_quantize_model( post_training_quantization = PostTrainingQuantization(subset_size=1, **quantization_params) - quantized_model = post_training_quantization.apply(original_model, graph, dataset=dataset) + quantized_model = post_training_quantization.run(original_model, dataset) return quantized_model @@ -117,11 +116,10 @@ def ptq_quantize_model( ) -> onnx.ModelProto: if convert_model_opset: original_model = convert_opset_version(original_model) - graph = GraphConverter.create_nncf_graph(original_model) dataset = get_random_dataset_for_test(original_model, dataset_has_batch_size) quantization_params = {} if quantization_params is None else quantization_params post_training_quantization = PostTrainingQuantization(subset_size=1, **quantization_params) - quantized_model = post_training_quantization.apply(original_model, graph, dataset=dataset) + quantized_model = post_training_quantization.run(original_model, dataset) return quantized_model diff --git a/tests/onnx/quantization/test_ptq_params.py b/tests/onnx/quantization/test_ptq_params.py index 9bc23b1410b..b53e9dc1448 100644 --- a/tests/onnx/quantization/test_ptq_params.py +++ b/tests/onnx/quantization/test_ptq_params.py @@ -25,7 +25,7 @@ from nncf.onnx.statistics.collectors import ONNXMinMaxStatisticCollector from nncf.parameters import TargetDevice from nncf.quantization.algorithms.min_max.onnx_backend import ONNXMinMaxAlgoBackend -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.scopes import IgnoredScope from tests.common.quantization.metatypes import Conv2dTestMetatype from tests.common.quantization.metatypes import LinearTestMetatype @@ -49,8 +49,8 @@ def get_ignored_patterns(device: TargetDevice = TargetDevice.ANY) -> GraphPatter @pytest.mark.parametrize("target_device", TargetDevice) def test_target_device(target_device): - algo = PostTrainingQuantization(target_device=target_device) - min_max_algo = algo.algorithms[0] + pipeline = PostTrainingQuantization(target_device=target_device) + min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = ONNXMinMaxAlgoBackend() assert min_max_algo._target_device == target_device diff --git a/tests/openvino/native/quantization/test_ptq_params.py b/tests/openvino/native/quantization/test_ptq_params.py index 3552915d523..436f16ed919 100644 --- a/tests/openvino/native/quantization/test_ptq_params.py +++ b/tests/openvino/native/quantization/test_ptq_params.py @@ -27,7 +27,7 @@ from nncf.openvino.graph.transformations.commands import OVTargetPoint from nncf.parameters import TargetDevice from nncf.quantization.algorithms.min_max.openvino_backend import OVMinMaxAlgoBackend -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.scopes import IgnoredScope from tests.common.quantization.metatypes import Conv2dTestMetatype from tests.common.quantization.metatypes import LinearTestMetatype @@ -49,8 +49,8 @@ def get_ignored_patterns(device: TargetDevice = TargetDevice.ANY) -> GraphPatter # pylint: disable=protected-access @pytest.mark.parametrize("target_device", [TargetDevice.CPU, TargetDevice.GPU, TargetDevice.VPU]) def test_target_device(target_device): - algo = PostTrainingQuantization(target_device=target_device) - min_max_algo = algo.algorithms[0] + pipelines = PostTrainingQuantization(target_device=target_device) + min_max_algo = pipelines.pipeline_steps[-1][0] min_max_algo._backend_entity = OVMinMaxAlgoBackend() assert min_max_algo._target_device.value == HW_CONFIG_TYPE_TARGET_DEVICE_MAP[target_device.value] diff --git a/tests/post_training/test_templates/test_bias_correction.py b/tests/post_training/test_templates/test_bias_correction.py index 68c72301707..8e9d64e7e47 100644 --- a/tests/post_training/test_templates/test_bias_correction.py +++ b/tests/post_training/test_templates/test_bias_correction.py @@ -20,7 +20,7 @@ from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection from nncf.quantization.algorithms.bias_correction.backend import BiasCorrectionAlgoBackend -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from tests.post_training.test_templates.helpers import ConvTestModel from tests.post_training.test_templates.helpers import MultipleConvTestModel from tests.post_training.test_templates.helpers import SplittedModel @@ -133,9 +133,8 @@ def quantized_test_model(self, tmpdir) -> TModel: model = self.backend_specific_model(model_cls(), tmpdir) dataset = Dataset(self.get_dataset(model_cls.INPUT_SIZE), self.get_transform_fn()) - quantization_algorithm = self.get_quantization_algorithm(disable_bias_correction=True) - graph = NNCFGraphFactory.create(model) - quantized_model = quantization_algorithm.apply(model, graph, dataset=dataset) + quantization_pipeline = self.get_quantization_algorithm(disable_bias_correction=True) + quantized_model = quantization_pipeline.run(model, dataset) modified_model = self.remove_fq_from_inputs(quantized_model) return modified_model @@ -160,9 +159,8 @@ def test_update_bias(self, model_cls, ref_biases, tmpdir): model = self.backend_specific_model(model_cls(), tmpdir) dataset = Dataset(self.get_dataset(model_cls.INPUT_SIZE), self.get_transform_fn()) - quantization_algorithm = self.get_quantization_algorithm() - graph = NNCFGraphFactory.create(model) - quantized_model = quantization_algorithm.apply(model, graph, dataset=dataset) + quantization_pipeline = self.get_quantization_algorithm() + quantized_model = quantization_pipeline.run(model, dataset) mapped_ref_biases = self.map_references(ref_biases) self.check_bias(quantized_model, mapped_ref_biases) diff --git a/tests/post_training/test_templates/test_fast_bias_correction.py b/tests/post_training/test_templates/test_fast_bias_correction.py index b972ce851cd..77163a018ec 100644 --- a/tests/post_training/test_templates/test_fast_bias_correction.py +++ b/tests/post_training/test_templates/test_fast_bias_correction.py @@ -14,12 +14,11 @@ import pytest -from nncf.common.factory import NNCFGraphFactory from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection from nncf.quantization.algorithms.fast_bias_correction.backend import FastBiasCorrectionAlgoBackend -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from tests.post_training.test_templates.helpers import ConvBNTestModel from tests.post_training.test_templates.helpers import ConvTestModel from tests.post_training.test_templates.helpers import get_static_dataset @@ -114,8 +113,7 @@ def test_update_bias(self, model_cls, ref_bias, tmpdir): model = self.backend_specific_model(model_cls(), tmpdir) dataset = get_static_dataset(model_cls.INPUT_SIZE, self.get_transform_fn(), self.fn_to_type) - quantization_algorithm = self.get_quantization_algorithm() - graph = NNCFGraphFactory.create(model) - quantized_model = quantization_algorithm.apply(model, graph, dataset=dataset) + quantization_pipeline = self.get_quantization_algorithm() + quantized_model = quantization_pipeline.run(model, dataset) self.check_bias(quantized_model, ref_bias) diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py index a2ec340c2ff..287bf886854 100644 --- a/tests/post_training/test_templates/test_ptq_params.py +++ b/tests/post_training/test_templates/test_ptq_params.py @@ -30,8 +30,8 @@ from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.quantization.passes import transform_to_inference_graph +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.quantization.range_estimator import RangeEstimatorParametersSet from nncf.scopes import IgnoredScope from tests.common.quantization.metatypes import Conv2dTestMetatype @@ -131,12 +131,12 @@ def metatypes_mapping(self): "range_estimator_params", [RangeEstimatorParametersSet.MINMAX, RangeEstimatorParametersSet.MEAN_MINMAX, None] ) def test_range_estimator_per_tensor(self, test_params, range_estimator_params): - algo = PostTrainingQuantization( + pipeline = PostTrainingQuantization( advanced_parameters=AdvancedQuantizationParameters( activations_range_estimator_params=range_estimator_params ) ) - min_max_algo = algo.algorithms[0] + min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() assert min_max_algo._range_estimator_params[QuantizerGroup.ACTIVATIONS] == range_estimator_params @@ -161,10 +161,10 @@ def test_range_estimator_per_tensor(self, test_params, range_estimator_params): @pytest.mark.parametrize("quantize_outputs", [False, True]) def test_quantize_outputs(self, test_params, quantize_outputs): - algo = PostTrainingQuantization( + pipeline = PostTrainingQuantization( advanced_parameters=AdvancedQuantizationParameters(quantize_outputs=quantize_outputs) ) - min_max_algo = algo.algorithms[0] + min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() nncf_graph = test_params["test_quantize_outputs"]["nncf_graph"] @@ -189,8 +189,8 @@ def test_quantize_outputs(self, test_params, quantize_outputs): def test_ignored_scopes(self, test_params, ignored_scopes_data): ignored_scope, act_num_ref, weight_num_ref = ignored_scopes_data - algo = PostTrainingQuantization(ignored_scope=ignored_scope) - min_max_algo = algo.algorithms[0] + pipeline = PostTrainingQuantization(ignored_scope=ignored_scope) + min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() assert min_max_algo._ignored_scope == ignored_scope @@ -215,8 +215,8 @@ def test_ignored_scopes(self, test_params, ignored_scopes_data): @pytest.mark.parametrize("model_type", [ModelType.TRANSFORMER]) def test_model_type_pass(self, test_params, model_type): - algo = PostTrainingQuantization(preset=QuantizationPreset.MIXED, model_type=model_type) - min_max_algo = algo.algorithms[0] + pipeline = PostTrainingQuantization(preset=QuantizationPreset.MIXED, model_type=model_type) + min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() nncf_graph = test_params["test_model_type_pass"]["nncf_graph"] diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py index e614138d0a9..28f309dbf7e 100644 --- a/tests/post_training/test_templates/test_quantizer_config.py +++ b/tests/post_training/test_templates/test_quantizer_config.py @@ -31,8 +31,8 @@ from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import QuantizationParameters -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.quantization.passes import transform_to_inference_graph +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.quantization.range_estimator import RangeEstimatorParametersSet from tests.post_training.test_templates.models import NNCFGraphToTest from tests.post_training.test_templates.models import NNCFGraphToTestDepthwiseConv @@ -81,8 +81,8 @@ def statistic_collector_parameters(self, request) -> TestGetStatisticsCollectorP pass def test_default_quantizer_config(self, single_conv_nncf_graph): - algo = PostTrainingQuantization() - min_max_algo = algo.algorithms[0] + pipeline = PostTrainingQuantization() + min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() nncf_graph = single_conv_nncf_graph.nncf_graph inference_nncf_graph = transform_to_inference_graph( @@ -127,7 +127,7 @@ def test_quantizer_config_from_ptq_params_for_CPU( signed_activations, single_conv_nncf_graph, ): - algo = PostTrainingQuantization( + pipeline = PostTrainingQuantization( preset=preset, advanced_parameters=AdvancedQuantizationParameters( activations_quantization_params=QuantizationParameters( @@ -138,7 +138,7 @@ def test_quantizer_config_from_ptq_params_for_CPU( ), ), ) - min_max_algo = algo.algorithms[0] + min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() nncf_graph = single_conv_nncf_graph.nncf_graph inference_nncf_graph = transform_to_inference_graph( @@ -179,8 +179,8 @@ def test_quantizer_config_from_ptq_params_for_CPU( assert quantization_point.qconfig.signedness_to_force == signed_activations def test_depthwise_conv_default_quantizer_config(self, depthwise_conv_nncf_graph): - algo = PostTrainingQuantization() - min_max_algo = algo.algorithms[0] + pipeline = PostTrainingQuantization() + min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() nncf_graph = depthwise_conv_nncf_graph.nncf_graph inference_nncf_graph = transform_to_inference_graph( @@ -223,12 +223,12 @@ def test_get_stat_collector( statistic_collector_parameters: TestGetStatisticsCollectorParameters, ): params = statistic_collector_parameters - algo = PostTrainingQuantization( + pipeline = PostTrainingQuantization( advanced_parameters=AdvancedQuantizationParameters( activations_range_estimator_params=range_estimator_params ) ) - min_max_algo = algo.algorithms[0] + min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() q_config = QuantizerConfig(num_bits=8, mode=q_config_mode, per_channel=q_config_per_channel) diff --git a/tests/post_training/test_templates/test_smooth_quant.py b/tests/post_training/test_templates/test_smooth_quant.py index 42fe17e01b0..5f56ec9e1b4 100644 --- a/tests/post_training/test_templates/test_smooth_quant.py +++ b/tests/post_training/test_templates/test_smooth_quant.py @@ -22,9 +22,9 @@ from nncf.parameters import ModelType from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant from nncf.quantization.algorithms.smooth_quant.backend import SmoothQuantAlgoBackend +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from tests.post_training.test_templates.helpers import LinearMultiShapeModel from tests.post_training.test_templates.helpers import NonZeroLinearModel from tests.post_training.test_templates.helpers import get_static_dataset @@ -119,9 +119,8 @@ def test_smooth_quant_algo(self, model_cls, reference_values, tmpdir): model = self.backend_specific_model(model_cls(), tmpdir) dataset = get_static_dataset(model_cls.INPUT_SIZE, self.get_transform_fn(), self.fn_to_type) - quantization_algorithm = self.get_quantization_algorithm() - graph = NNCFGraphFactory.create(model) - quantized_model = quantization_algorithm.apply(model, graph, dataset=dataset) + quantization_pipeline = self.get_quantization_algorithm() + quantized_model = quantization_pipeline.run(model, dataset) self.check_scales(quantized_model, reference_values) diff --git a/tests/torch/ptq/test_fq_params_calculation.py b/tests/torch/ptq/test_fq_params_calculation.py index 974f42588d8..08f1869f2ce 100644 --- a/tests/torch/ptq/test_fq_params_calculation.py +++ b/tests/torch/ptq/test_fq_params_calculation.py @@ -18,8 +18,7 @@ import nncf from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix -from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.torch.model_creation import create_nncf_network from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.quantization.layers import QUANTIZATION_MODULES @@ -50,16 +49,9 @@ def transform_fn(sample): dataset = nncf.Dataset(dataloader, transform_func=transform_fn) post_training_quantization = PostTrainingQuantization(subset_size=1, **quantization_params) - # Using PTQ, but apply only MinMax - updated_algorithms = [] - for algo in post_training_quantization.algorithms: - if isinstance(algo, MinMaxQuantization): - updated_algorithms.append(algo) - post_training_quantization.algorithms = updated_algorithms - original_model.eval() nncf_network = create_nncf_network(original_model, config) - quantized_model = post_training_quantization.apply(nncf_network, nncf_network.nncf.get_graph(), dataset=dataset) + quantized_model = post_training_quantization.run(nncf_network, dataset) return quantized_model @@ -89,7 +81,12 @@ def get_fq_nodes_params(model: NNCFNetwork) -> Dict[str, np.ndarray]: def test_overflow_fix_scales(_seed, overflow_fix): model = TwoConvTestModel() quantized_model = min_max_quantize_model( - model, quantization_params={"advanced_parameters": AdvancedQuantizationParameters(overflow_fix=overflow_fix)} + model, + quantization_params={ + "advanced_parameters": AdvancedQuantizationParameters( + overflow_fix=overflow_fix, disable_bias_correction=True, disable_channel_alignment=True + ) + }, ) fq_nodes_params = get_fq_nodes_params(quantized_model) diff --git a/tests/torch/ptq/test_graphs.py b/tests/torch/ptq/test_graphs.py index b688e9cf830..c8dc9d2103b 100644 --- a/tests/torch/ptq/test_graphs.py +++ b/tests/torch/ptq/test_graphs.py @@ -16,7 +16,7 @@ from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.torch.layers import NNCF_RNN from nncf.torch.layers import LSTMCellNNCF from tests.post_training.test_templates.helpers import EmbeddingModel @@ -98,8 +98,7 @@ def test_min_max_classification_quantized_graphs(desc: ModelDesc, quantization_p nncf_network = get_nncf_network(model, desc.input_sample_sizes) quantization_parameters["advanced_parameters"] = AdvancedQuantizationParameters(disable_bias_correction=True) - quantization_algorithm = PostTrainingQuantization(**quantization_parameters) - - quantized_model = quantization_algorithm.apply(nncf_network, nncf_network.nncf.get_graph(), dataset=None) + quantization_pipeline = PostTrainingQuantization(**quantization_parameters) + quantized_model = quantization_pipeline.run(nncf_network, dataset=None) check_graph(quantized_model.nncf.get_graph(), desc.dot_filename, graph_dir) diff --git a/tests/torch/ptq/test_ptq_params.py b/tests/torch/ptq/test_ptq_params.py index c174ec8b322..2bb2002eda8 100644 --- a/tests/torch/ptq/test_ptq_params.py +++ b/tests/torch/ptq/test_ptq_params.py @@ -25,7 +25,7 @@ from nncf.quantization.advanced_parameters import QuantizationMode from nncf.quantization.advanced_parameters import QuantizationParameters from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend -from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization from nncf.quantization.range_estimator import RangeEstimatorParametersSet from nncf.scopes import IgnoredScope from nncf.torch.graph.graph import PTTargetPoint @@ -94,8 +94,8 @@ def forward(self, x): @pytest.mark.parametrize("target_device", TargetDevice) def test_target_device(target_device): - algo = PostTrainingQuantization(target_device=target_device) - min_max_algo = algo.algorithms[0] + pipeline = PostTrainingQuantization(target_device=target_device) + min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = PTMinMaxAlgoBackend() assert min_max_algo._target_device == target_device From 1c8d51169d600b20bdebd2417162a09a60b12673 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Thu, 21 Sep 2023 09:45:22 +0100 Subject: [PATCH 5/7] Remove the PostTrainingQuantization class --- .../torch/quantization/quantize_model.py | 4 +- nncf/onnx/quantization/quantize_model.py | 4 +- nncf/openvino/quantization/quantize_model.py | 4 +- .../hyperparameter_tuner/param_grid.py | 4 +- .../pipelines/post_training/pipeline.py | 192 +++++++++--------- nncf/quantization/quantize_model.py | 6 +- tests/onnx/quantization/common.py | 6 +- tests/onnx/quantization/test_ptq_params.py | 4 +- .../native/quantization/test_ptq_params.py | 4 +- .../test_templates/test_bias_correction.py | 7 +- .../test_fast_bias_correction.py | 4 +- .../test_templates/test_ptq_params.py | 10 +- .../test_templates/test_quantizer_config.py | 10 +- .../test_templates/test_smooth_quant.py | 4 +- tests/torch/ptq/test_fq_params_calculation.py | 4 +- tests/torch/ptq/test_graphs.py | 4 +- tests/torch/ptq/test_ptq_params.py | 4 +- 17 files changed, 135 insertions(+), 140 deletions(-) diff --git a/nncf/experimental/torch/quantization/quantize_model.py b/nncf/experimental/torch/quantization/quantize_model.py index 44a5c0053ae..e6a85fa3645 100644 --- a/nncf/experimental/torch/quantization/quantize_model.py +++ b/nncf/experimental/torch/quantization/quantize_model.py @@ -18,7 +18,7 @@ from nncf.parameters import ModelType from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from nncf.scopes import IgnoredScope from nncf.torch.dynamic_graph.context import no_nncf_trace from nncf.torch.dynamic_graph.io_handling import replicate_same_tensors @@ -105,7 +105,7 @@ def quantize_impl( nncf_network = create_nncf_network(model.eval(), calibration_dataset) - quantization_pipeline = PostTrainingQuantization( + quantization_pipeline = create_ptq_pipeline( preset=preset, target_device=target_device, subset_size=subset_size, diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py index 4d3cf404749..8808489bc5f 100644 --- a/nncf/onnx/quantization/quantize_model.py +++ b/nncf/onnx/quantization/quantize_model.py @@ -19,7 +19,7 @@ from nncf.parameters import ModelType from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from nncf.quantization.telemetry_extractors import CompressionStartedWithQuantizeApi from nncf.scopes import IgnoredScope from nncf.telemetry import tracked_function @@ -55,7 +55,7 @@ def quantize_impl( advanced_parameters.weights_quantization_params.per_channel = False advanced_parameters.activations_quantization_params.per_channel = False - quantization_pipeline = PostTrainingQuantization( + quantization_pipeline = create_ptq_pipeline( preset=preset, target_device=target_device, subset_size=subset_size, diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index 2dc64a10dfa..3befc193421 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -31,7 +31,7 @@ from nncf.quantization.algorithms.accuracy_control.algorithm import QuantizationAccuracyRestorer from nncf.quantization.algorithms.accuracy_control.algorithm import calculate_accuracy_drop from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from nncf.quantization.quantize_model import quantize_with_tune_hyperparams from nncf.quantization.telemetry_extractors import CompressionStartedWithQuantizeApi from nncf.scopes import IgnoredScope @@ -105,7 +105,7 @@ def native_quantize_impl( """ Implementation of the `quantize()` method for the OpenVINO backend via the OpenVINO Runtime API. """ - quantization_algorithm = PostTrainingQuantization( + quantization_algorithm = create_ptq_pipeline( preset=preset, target_device=target_device, subset_size=subset_size, diff --git a/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py b/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py index 28019394bec..958f93f9b50 100644 --- a/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py +++ b/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py @@ -18,7 +18,7 @@ from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.stepwise_pipeline import StepwisePipeline from nncf.quantization.range_estimator import AggregatorType from nncf.quantization.range_estimator import RangeEstimatorParameters from nncf.quantization.range_estimator import StatisticsCollectorParameters @@ -89,7 +89,7 @@ def _get_bias_correction_param_grid() -> ParamGrid: return {"fast_bias_correction": [True, False]} -def get_quantization_param_grids(pipeline: PostTrainingQuantization) -> List[ParamGrid]: +def get_quantization_param_grids(pipeline: StepwisePipeline) -> List[ParamGrid]: """ Returns params grid for post-training quantization algorithm. """ diff --git a/nncf/quantization/pipelines/post_training/pipeline.py b/nncf/quantization/pipelines/post_training/pipeline.py index 24105dbdb7e..16fc47f86f9 100644 --- a/nncf/quantization/pipelines/post_training/pipeline.py +++ b/nncf/quantization/pipelines/post_training/pipeline.py @@ -29,117 +29,111 @@ TModel = TypeVar("TModel") -class PostTrainingQuantization(StepwisePipeline): +def create_ptq_pipeline( + preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + target_device: TargetDevice = TargetDevice.ANY, + subset_size: int = 300, + fast_bias_correction: bool = True, + model_type: Optional[ModelType] = None, + ignored_scope: Optional[IgnoredScope] = None, + advanced_parameters: Optional[AdvancedQuantizationParameters] = None, +) -> StepwisePipeline: """ - A class for creating a post-training quantization pipeline. + Creates a post-training quantization pipeline. + The post-training quantization pipeline includes the following steps: 1) SmoothQuant 2) ChannelAlignment 3) MinMaxQuantization 4) FastBiasCorrection or BiasCorrection + + :param preset: A preset that controls the quantization mode + (symmetric and asymmetric). It can take the following values: + - `performance`: Symmetric quantization of weights and activations. + - `mixed`: Symmetric quantization of weights and asymmetric + quantization of activations. + :param target_device: A target device the specificity of which will be taken + into account while compressing in order to obtain the best performance + for this type of device. + :param subset_size: Size of a subset to calculate activations + statistics used for quantization. + :param fast_bias_correction: Setting this option to `False` enables a different + bias correction method which is more accurate, in general, and takes + more time but requires less memory. + :param model_type: Model type is needed to specify additional patterns + in the model. Supported only `transformer` now. + :param ignored_scope: An ignored scope that defined the list of model control + flow graph nodes to be ignored during quantization. + :param advanced_parameters: Advanced quantization parameters for + fine-tuning the quantization algorithm + :return: A post-training quantization pipeline. """ + if target_device is TargetDevice.VPU: + warning_deprecated("VPU device is deprecated and will no longer be supported in the future.") - def __init__( - self, - preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, - target_device: TargetDevice = TargetDevice.ANY, - subset_size: int = 300, - fast_bias_correction: bool = True, - model_type: Optional[ModelType] = None, - ignored_scope: Optional[IgnoredScope] = None, - advanced_parameters: Optional[AdvancedQuantizationParameters] = None, - ): - """ - :param preset: A preset that controls the quantization mode - (symmetric and asymmetric). It can take the following values: - - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric - quantization of activations. - :param target_device: A target device the specificity of which will be taken - into account while compressing in order to obtain the best performance - for this type of device. - :param subset_size: Size of a subset to calculate activations - statistics used for quantization. - :param fast_bias_correction: Setting this option to `False` enables a different - bias correction method which is more accurate, in general, and takes - more time but requires less memory. - :param model_type: Model type is needed to specify additional patterns - in the model. Supported only `transformer` now. - :param ignored_scope: An ignored scope that defined the list of model control - flow graph nodes to be ignored during quantization. - :param advanced_parameters: Advanced quantization parameters for - fine-tuning the quantization algorithm - """ - if target_device is TargetDevice.VPU: - warning_deprecated("VPU device is deprecated and will no longer be supported in the future.") - - if advanced_parameters is None: - advanced_parameters = AdvancedQuantizationParameters() - - # Build the post-training quantization pipeline. - pipeline_steps = [] - - # Add the `SmoothQuant` algorithm as the first step of the pipeline. - # It is added only for `ModelType.TRANSFORMER`. - if model_type == ModelType.TRANSFORMER and advanced_parameters.smooth_quant_alpha >= 0: - pipeline_steps.append( - [ - SmoothQuant( - subset_size, advanced_parameters.inplace_statistics, advanced_parameters.smooth_quant_alpha - ) - ] - ) + if advanced_parameters is None: + advanced_parameters = AdvancedQuantizationParameters() - # Add the `ChannelAlignment` algorithm as the second step of the pipeline. - if not advanced_parameters.disable_channel_alignment: - pipeline_steps.append([ChannelAlignment(subset_size, advanced_parameters.inplace_statistics)]) + # Build the post-training quantization pipeline. + pipeline_steps = [] - # Add the `MinMaxQuantization` algorithm as the third step of the pipeline. + # Add the `SmoothQuant` algorithm as the first step of the pipeline. + # It is added only for `ModelType.TRANSFORMER`. + if model_type == ModelType.TRANSFORMER and advanced_parameters.smooth_quant_alpha >= 0: pipeline_steps.append( - [ - MinMaxQuantization( - preset, - target_device, - subset_size, - model_type, - ignored_scope, - advanced_parameters.overflow_fix, - advanced_parameters.quantize_outputs, - advanced_parameters.inplace_statistics, - advanced_parameters.activations_quantization_params, - advanced_parameters.weights_quantization_params, - advanced_parameters.activations_range_estimator_params, - advanced_parameters.weights_range_estimator_params, - advanced_parameters.backend_params, - ) - ] + [SmoothQuant(subset_size, advanced_parameters.inplace_statistics, advanced_parameters.smooth_quant_alpha)] ) - if not advanced_parameters.disable_bias_correction: - # Add the `FastBiasCorrection` or `BiasCorrection` as additional algorithm - # inside the third step of the pipeline. It is added after `MinMaxQuantization` - # algorithm. - bias_correction_params = advanced_parameters.bias_correction_params - if fast_bias_correction: - threshold = FAST_BIAS_CORRECTION_THRESHOLD - bias_correction_subset_size = subset_size - bias_correction_cls = FastBiasCorrection - else: - threshold = BIAS_CORRECTION_THRESHOLD - bias_correction_subset_size = max(int(subset_size * 0.2), 1) - bias_correction_cls = BiasCorrection - - if bias_correction_params.threshold is not None: - threshold = bias_correction_params.threshold - - pipeline_steps[-1].append( - bias_correction_cls( - bias_correction_subset_size, - threshold, - bias_correction_params.apply_for_all_nodes, - advanced_parameters.inplace_statistics, - advanced_parameters.backend_params, - ) + # Add the `ChannelAlignment` algorithm as the second step of the pipeline. + if not advanced_parameters.disable_channel_alignment: + pipeline_steps.append([ChannelAlignment(subset_size, advanced_parameters.inplace_statistics)]) + + # Add the `MinMaxQuantization` algorithm as the third step of the pipeline. + pipeline_steps.append( + [ + MinMaxQuantization( + preset, + target_device, + subset_size, + model_type, + ignored_scope, + advanced_parameters.overflow_fix, + advanced_parameters.quantize_outputs, + advanced_parameters.inplace_statistics, + advanced_parameters.activations_quantization_params, + advanced_parameters.weights_quantization_params, + advanced_parameters.activations_range_estimator_params, + advanced_parameters.weights_range_estimator_params, + advanced_parameters.backend_params, ) + ] + ) + + if not advanced_parameters.disable_bias_correction: + # Add the `FastBiasCorrection` or `BiasCorrection` as additional algorithm + # inside the third step of the pipeline. It is added after `MinMaxQuantization` + # algorithm. + bias_correction_params = advanced_parameters.bias_correction_params + if fast_bias_correction: + threshold = FAST_BIAS_CORRECTION_THRESHOLD + bias_correction_subset_size = subset_size + bias_correction_cls = FastBiasCorrection + else: + threshold = BIAS_CORRECTION_THRESHOLD + bias_correction_subset_size = max(int(subset_size * 0.2), 1) + bias_correction_cls = BiasCorrection + + if bias_correction_params.threshold is not None: + threshold = bias_correction_params.threshold + + pipeline_steps[-1].append( + bias_correction_cls( + bias_correction_subset_size, + threshold, + bias_correction_params.apply_for_all_nodes, + advanced_parameters.inplace_statistics, + advanced_parameters.backend_params, + ) + ) - super().__init__(pipeline_steps) + return StepwisePipeline(pipeline_steps) diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index 2f5aa05cdc8..989a7ce2d7b 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -25,7 +25,7 @@ from nncf.quantization.algorithms.accuracy_control.evaluator import MetricResults from nncf.quantization.pipelines.hyperparameter_tuner.param_grid import get_quantization_param_grids from nncf.quantization.pipelines.hyperparameter_tuner.pipeline import HyperparameterTuner -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from nncf.scopes import IgnoredScope TTensor = TypeVar("TTensor") @@ -299,10 +299,10 @@ def quantize_with_tune_hyperparams( "advanced_parameters": advanced_quantization_parameters, } - param_grids = get_quantization_param_grids(PostTrainingQuantization(**init_quantization_params)) + param_grids = get_quantization_param_grids(create_ptq_pipeline(**init_quantization_params)) hyperparameter_tuner = HyperparameterTuner( - PostTrainingQuantization, + create_ptq_pipeline, init_quantization_params, param_grids, calibration_dataset, diff --git a/tests/onnx/quantization/common.py b/tests/onnx/quantization/common.py index 6e0bfd76461..0e2bb4531e8 100644 --- a/tests/onnx/quantization/common.py +++ b/tests/onnx/quantization/common.py @@ -21,7 +21,7 @@ from nncf.onnx.statistics.statistics import ONNXMinMaxTensorStatistic from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.fake_quantize import FakeQuantizeParameters -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from tests.onnx.common import get_random_generator from tests.onnx.opset_converter import convert_opset_version from tests.shared.nx_graph import check_nx_graph @@ -102,7 +102,7 @@ def min_max_quantize_model( advanced_parameters.disable_bias_correction = True quantization_params["advanced_parameters"] = advanced_parameters - post_training_quantization = PostTrainingQuantization(subset_size=1, **quantization_params) + post_training_quantization = create_ptq_pipeline(subset_size=1, **quantization_params) quantized_model = post_training_quantization.run(original_model, dataset) return quantized_model @@ -118,7 +118,7 @@ def ptq_quantize_model( original_model = convert_opset_version(original_model) dataset = get_random_dataset_for_test(original_model, dataset_has_batch_size) quantization_params = {} if quantization_params is None else quantization_params - post_training_quantization = PostTrainingQuantization(subset_size=1, **quantization_params) + post_training_quantization = create_ptq_pipeline(subset_size=1, **quantization_params) quantized_model = post_training_quantization.run(original_model, dataset) return quantized_model diff --git a/tests/onnx/quantization/test_ptq_params.py b/tests/onnx/quantization/test_ptq_params.py index b53e9dc1448..961271699b8 100644 --- a/tests/onnx/quantization/test_ptq_params.py +++ b/tests/onnx/quantization/test_ptq_params.py @@ -25,7 +25,7 @@ from nncf.onnx.statistics.collectors import ONNXMinMaxStatisticCollector from nncf.parameters import TargetDevice from nncf.quantization.algorithms.min_max.onnx_backend import ONNXMinMaxAlgoBackend -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from nncf.scopes import IgnoredScope from tests.common.quantization.metatypes import Conv2dTestMetatype from tests.common.quantization.metatypes import LinearTestMetatype @@ -49,7 +49,7 @@ def get_ignored_patterns(device: TargetDevice = TargetDevice.ANY) -> GraphPatter @pytest.mark.parametrize("target_device", TargetDevice) def test_target_device(target_device): - pipeline = PostTrainingQuantization(target_device=target_device) + pipeline = create_ptq_pipeline(target_device=target_device) min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = ONNXMinMaxAlgoBackend() assert min_max_algo._target_device == target_device diff --git a/tests/openvino/native/quantization/test_ptq_params.py b/tests/openvino/native/quantization/test_ptq_params.py index 436f16ed919..05c15b6c098 100644 --- a/tests/openvino/native/quantization/test_ptq_params.py +++ b/tests/openvino/native/quantization/test_ptq_params.py @@ -27,7 +27,7 @@ from nncf.openvino.graph.transformations.commands import OVTargetPoint from nncf.parameters import TargetDevice from nncf.quantization.algorithms.min_max.openvino_backend import OVMinMaxAlgoBackend -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from nncf.scopes import IgnoredScope from tests.common.quantization.metatypes import Conv2dTestMetatype from tests.common.quantization.metatypes import LinearTestMetatype @@ -49,7 +49,7 @@ def get_ignored_patterns(device: TargetDevice = TargetDevice.ANY) -> GraphPatter # pylint: disable=protected-access @pytest.mark.parametrize("target_device", [TargetDevice.CPU, TargetDevice.GPU, TargetDevice.VPU]) def test_target_device(target_device): - pipelines = PostTrainingQuantization(target_device=target_device) + pipelines = create_ptq_pipeline(target_device=target_device) min_max_algo = pipelines.pipeline_steps[-1][0] min_max_algo._backend_entity = OVMinMaxAlgoBackend() assert min_max_algo._target_device.value == HW_CONFIG_TYPE_TARGET_DEVICE_MAP[target_device.value] diff --git a/tests/post_training/test_templates/test_bias_correction.py b/tests/post_training/test_templates/test_bias_correction.py index 8e9d64e7e47..8bc4e271030 100644 --- a/tests/post_training/test_templates/test_bias_correction.py +++ b/tests/post_training/test_templates/test_bias_correction.py @@ -20,7 +20,8 @@ from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection from nncf.quantization.algorithms.bias_correction.backend import BiasCorrectionAlgoBackend -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline +from nncf.quantization.pipelines.stepwise_pipeline import StepwisePipeline from tests.post_training.test_templates.helpers import ConvTestModel from tests.post_training.test_templates.helpers import MultipleConvTestModel from tests.post_training.test_templates.helpers import SplittedModel @@ -93,8 +94,8 @@ def map_references(ref_biases: Dict) -> Dict[str, List]: return ref_biases @staticmethod - def get_quantization_algorithm(disable_bias_correction=False) -> PostTrainingQuantization: - return PostTrainingQuantization( + def get_quantization_algorithm(disable_bias_correction=False) -> StepwisePipeline: + return create_ptq_pipeline( subset_size=1, fast_bias_correction=False, advanced_parameters=AdvancedQuantizationParameters( diff --git a/tests/post_training/test_templates/test_fast_bias_correction.py b/tests/post_training/test_templates/test_fast_bias_correction.py index 77163a018ec..c0b5779254c 100644 --- a/tests/post_training/test_templates/test_fast_bias_correction.py +++ b/tests/post_training/test_templates/test_fast_bias_correction.py @@ -18,7 +18,7 @@ from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection from nncf.quantization.algorithms.fast_bias_correction.backend import FastBiasCorrectionAlgoBackend -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from tests.post_training.test_templates.helpers import ConvBNTestModel from tests.post_training.test_templates.helpers import ConvTestModel from tests.post_training.test_templates.helpers import get_static_dataset @@ -96,7 +96,7 @@ def check_bias(model: TModel, ref_bias: list): @staticmethod def get_quantization_algorithm(): - return PostTrainingQuantization( + return create_ptq_pipeline( subset_size=1, fast_bias_correction=True, advanced_parameters=AdvancedQuantizationParameters(overflow_fix=OverflowFix.DISABLE), diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py index 287bf886854..567b348f84a 100644 --- a/tests/post_training/test_templates/test_ptq_params.py +++ b/tests/post_training/test_templates/test_ptq_params.py @@ -31,7 +31,7 @@ from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization from nncf.quantization.passes import transform_to_inference_graph -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from nncf.quantization.range_estimator import RangeEstimatorParametersSet from nncf.scopes import IgnoredScope from tests.common.quantization.metatypes import Conv2dTestMetatype @@ -131,7 +131,7 @@ def metatypes_mapping(self): "range_estimator_params", [RangeEstimatorParametersSet.MINMAX, RangeEstimatorParametersSet.MEAN_MINMAX, None] ) def test_range_estimator_per_tensor(self, test_params, range_estimator_params): - pipeline = PostTrainingQuantization( + pipeline = create_ptq_pipeline( advanced_parameters=AdvancedQuantizationParameters( activations_range_estimator_params=range_estimator_params ) @@ -161,7 +161,7 @@ def test_range_estimator_per_tensor(self, test_params, range_estimator_params): @pytest.mark.parametrize("quantize_outputs", [False, True]) def test_quantize_outputs(self, test_params, quantize_outputs): - pipeline = PostTrainingQuantization( + pipeline = create_ptq_pipeline( advanced_parameters=AdvancedQuantizationParameters(quantize_outputs=quantize_outputs) ) min_max_algo = pipeline.pipeline_steps[-1][0] @@ -189,7 +189,7 @@ def test_quantize_outputs(self, test_params, quantize_outputs): def test_ignored_scopes(self, test_params, ignored_scopes_data): ignored_scope, act_num_ref, weight_num_ref = ignored_scopes_data - pipeline = PostTrainingQuantization(ignored_scope=ignored_scope) + pipeline = create_ptq_pipeline(ignored_scope=ignored_scope) min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() assert min_max_algo._ignored_scope == ignored_scope @@ -215,7 +215,7 @@ def test_ignored_scopes(self, test_params, ignored_scopes_data): @pytest.mark.parametrize("model_type", [ModelType.TRANSFORMER]) def test_model_type_pass(self, test_params, model_type): - pipeline = PostTrainingQuantization(preset=QuantizationPreset.MIXED, model_type=model_type) + pipeline = create_ptq_pipeline(preset=QuantizationPreset.MIXED, model_type=model_type) min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() diff --git a/tests/post_training/test_templates/test_quantizer_config.py b/tests/post_training/test_templates/test_quantizer_config.py index 28f309dbf7e..cb771f678ed 100644 --- a/tests/post_training/test_templates/test_quantizer_config.py +++ b/tests/post_training/test_templates/test_quantizer_config.py @@ -32,7 +32,7 @@ from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import QuantizationParameters from nncf.quantization.passes import transform_to_inference_graph -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from nncf.quantization.range_estimator import RangeEstimatorParametersSet from tests.post_training.test_templates.models import NNCFGraphToTest from tests.post_training.test_templates.models import NNCFGraphToTestDepthwiseConv @@ -81,7 +81,7 @@ def statistic_collector_parameters(self, request) -> TestGetStatisticsCollectorP pass def test_default_quantizer_config(self, single_conv_nncf_graph): - pipeline = PostTrainingQuantization() + pipeline = create_ptq_pipeline() min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() nncf_graph = single_conv_nncf_graph.nncf_graph @@ -127,7 +127,7 @@ def test_quantizer_config_from_ptq_params_for_CPU( signed_activations, single_conv_nncf_graph, ): - pipeline = PostTrainingQuantization( + pipeline = create_ptq_pipeline( preset=preset, advanced_parameters=AdvancedQuantizationParameters( activations_quantization_params=QuantizationParameters( @@ -179,7 +179,7 @@ def test_quantizer_config_from_ptq_params_for_CPU( assert quantization_point.qconfig.signedness_to_force == signed_activations def test_depthwise_conv_default_quantizer_config(self, depthwise_conv_nncf_graph): - pipeline = PostTrainingQuantization() + pipeline = create_ptq_pipeline() min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = self.get_algo_backend() nncf_graph = depthwise_conv_nncf_graph.nncf_graph @@ -223,7 +223,7 @@ def test_get_stat_collector( statistic_collector_parameters: TestGetStatisticsCollectorParameters, ): params = statistic_collector_parameters - pipeline = PostTrainingQuantization( + pipeline = create_ptq_pipeline( advanced_parameters=AdvancedQuantizationParameters( activations_range_estimator_params=range_estimator_params ) diff --git a/tests/post_training/test_templates/test_smooth_quant.py b/tests/post_training/test_templates/test_smooth_quant.py index 5f56ec9e1b4..848247a3274 100644 --- a/tests/post_training/test_templates/test_smooth_quant.py +++ b/tests/post_training/test_templates/test_smooth_quant.py @@ -24,7 +24,7 @@ from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant from nncf.quantization.algorithms.smooth_quant.backend import SmoothQuantAlgoBackend -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from tests.post_training.test_templates.helpers import LinearMultiShapeModel from tests.post_training.test_templates.helpers import NonZeroLinearModel from tests.post_training.test_templates.helpers import get_static_dataset @@ -75,7 +75,7 @@ def get_matmul_metatype(): @staticmethod def get_quantization_algorithm(): - return PostTrainingQuantization( + return create_ptq_pipeline( subset_size=1, model_type=ModelType.TRANSFORMER, advanced_parameters=AdvancedQuantizationParameters( diff --git a/tests/torch/ptq/test_fq_params_calculation.py b/tests/torch/ptq/test_fq_params_calculation.py index 08f1869f2ce..90a34963bf4 100644 --- a/tests/torch/ptq/test_fq_params_calculation.py +++ b/tests/torch/ptq/test_fq_params_calculation.py @@ -18,7 +18,7 @@ import nncf from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from nncf.torch.model_creation import create_nncf_network from nncf.torch.nncf_network import NNCFNetwork from nncf.torch.quantization.layers import QUANTIZATION_MODULES @@ -48,7 +48,7 @@ def transform_fn(sample): dataset = nncf.Dataset(dataloader, transform_func=transform_fn) - post_training_quantization = PostTrainingQuantization(subset_size=1, **quantization_params) + post_training_quantization = create_ptq_pipeline(subset_size=1, **quantization_params) original_model.eval() nncf_network = create_nncf_network(original_model, config) quantized_model = post_training_quantization.run(nncf_network, dataset) diff --git a/tests/torch/ptq/test_graphs.py b/tests/torch/ptq/test_graphs.py index c8dc9d2103b..12d46910f7c 100644 --- a/tests/torch/ptq/test_graphs.py +++ b/tests/torch/ptq/test_graphs.py @@ -16,7 +16,7 @@ from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from nncf.torch.layers import NNCF_RNN from nncf.torch.layers import LSTMCellNNCF from tests.post_training.test_templates.helpers import EmbeddingModel @@ -98,7 +98,7 @@ def test_min_max_classification_quantized_graphs(desc: ModelDesc, quantization_p nncf_network = get_nncf_network(model, desc.input_sample_sizes) quantization_parameters["advanced_parameters"] = AdvancedQuantizationParameters(disable_bias_correction=True) - quantization_pipeline = PostTrainingQuantization(**quantization_parameters) + quantization_pipeline = create_ptq_pipeline(**quantization_parameters) quantized_model = quantization_pipeline.run(nncf_network, dataset=None) check_graph(quantized_model.nncf.get_graph(), desc.dot_filename, graph_dir) diff --git a/tests/torch/ptq/test_ptq_params.py b/tests/torch/ptq/test_ptq_params.py index 2bb2002eda8..4ab018d4b41 100644 --- a/tests/torch/ptq/test_ptq_params.py +++ b/tests/torch/ptq/test_ptq_params.py @@ -25,7 +25,7 @@ from nncf.quantization.advanced_parameters import QuantizationMode from nncf.quantization.advanced_parameters import QuantizationParameters from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend -from nncf.quantization.pipelines.post_training.pipeline import PostTrainingQuantization +from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline from nncf.quantization.range_estimator import RangeEstimatorParametersSet from nncf.scopes import IgnoredScope from nncf.torch.graph.graph import PTTargetPoint @@ -94,7 +94,7 @@ def forward(self, x): @pytest.mark.parametrize("target_device", TargetDevice) def test_target_device(target_device): - pipeline = PostTrainingQuantization(target_device=target_device) + pipeline = create_ptq_pipeline(target_device=target_device) min_max_algo = pipeline.pipeline_steps[-1][0] min_max_algo._backend_entity = PTMinMaxAlgoBackend() assert min_max_algo._target_device == target_device From 96e8d76ad5a684e496e4487f4b51ce0bbb99004a Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Thu, 21 Sep 2023 11:54:16 +0100 Subject: [PATCH 6/7] Remove StepwisePipeline class --- .../hyperparameter_tuner/param_grid.py | 4 +- .../hyperparameter_tuner/pipeline.py | 15 +- nncf/quantization/pipelines/pipeline.py | 169 ++++++++++++++-- .../pipelines/post_training/pipeline.py | 6 +- .../pipelines/stepwise_pipeline.py | 182 ------------------ .../test_templates/test_bias_correction.py | 4 +- 6 files changed, 172 insertions(+), 208 deletions(-) delete mode 100644 nncf/quantization/pipelines/stepwise_pipeline.py diff --git a/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py b/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py index 958f93f9b50..d9af782cdfe 100644 --- a/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py +++ b/nncf/quantization/pipelines/hyperparameter_tuner/param_grid.py @@ -18,7 +18,7 @@ from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant -from nncf.quantization.pipelines.stepwise_pipeline import StepwisePipeline +from nncf.quantization.pipelines.pipeline import Pipeline from nncf.quantization.range_estimator import AggregatorType from nncf.quantization.range_estimator import RangeEstimatorParameters from nncf.quantization.range_estimator import StatisticsCollectorParameters @@ -89,7 +89,7 @@ def _get_bias_correction_param_grid() -> ParamGrid: return {"fast_bias_correction": [True, False]} -def get_quantization_param_grids(pipeline: StepwisePipeline) -> List[ParamGrid]: +def get_quantization_param_grids(pipeline: Pipeline) -> List[ParamGrid]: """ Returns params grid for post-training quantization algorithm. """ diff --git a/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py b/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py index 85407b8d38e..4bc755f6d1f 100644 --- a/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py +++ b/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py @@ -28,11 +28,10 @@ from nncf.quantization.algorithms.accuracy_control.rank_functions import create_normalized_mse_func from nncf.quantization.algorithms.accuracy_control.subset_selection import select_subset from nncf.quantization.pipelines.pipeline import Pipeline -from nncf.quantization.pipelines.stepwise_pipeline import StepwisePipeline -from nncf.quantization.pipelines.stepwise_pipeline import collect_statistics -from nncf.quantization.pipelines.stepwise_pipeline import get_statistic_points -from nncf.quantization.pipelines.stepwise_pipeline import run_pipeline_from_step -from nncf.quantization.pipelines.stepwise_pipeline import run_pipeline_step +from nncf.quantization.pipelines.pipeline import collect_statistics +from nncf.quantization.pipelines.pipeline import get_statistic_points +from nncf.quantization.pipelines.pipeline import run_pipeline_from_step +from nncf.quantization.pipelines.pipeline import run_pipeline_step TModel = TypeVar("TModel") TTensor = TypeVar("TTensor") @@ -180,7 +179,7 @@ def find_best_combination( return best_combination_key -class HyperparameterTuner(Pipeline): +class HyperparameterTuner: """ This algorithm is used to find a best combination of parameters from `param_grid`. @@ -219,7 +218,7 @@ class HyperparameterTuner(Pipeline): def __init__( self, - pipeline_cls: Type[StepwisePipeline], + pipeline_cls: Type[Pipeline], init_params: Dict[str, Any], param_grids: List[Dict[str, List[Any]]], calibration_dataset: Dataset, @@ -255,7 +254,7 @@ def __init__( self._error_fn = None # Will be initialized inside `_prepare_pipeline_step()` method - self._pipelines: Dict[CombinationKey, StepwisePipeline] = {} + self._pipelines: Dict[CombinationKey, Pipeline] = {} self._step_index_to_statistics: Dict[int, StatisticPointsContainer] = {} self._calculated_scores: Dict[CombinationKey, float] = {} diff --git a/nncf/quantization/pipelines/pipeline.py b/nncf/quantization/pipelines/pipeline.py index 770d89d34d7..5416829fdbf 100644 --- a/nncf/quantization/pipelines/pipeline.py +++ b/nncf/quantization/pipelines/pipeline.py @@ -9,30 +9,177 @@ # See the License for the specific language governing permissions and # limitations under the License. -from abc import ABC -from abc import abstractmethod -from typing import TypeVar +from typing import Dict, List, Optional, TypeVar, Union +from nncf.common.factory import NNCFGraphFactory +from nncf.common.factory import StatisticsAggregatorFactory +from nncf.common.graph.graph import NNCFGraph +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.data.dataset import Dataset +from nncf.quantization.algorithms.algorithm import Algorithm TModel = TypeVar("TModel") +PipelineStep = List[Algorithm] -class Pipeline(ABC): +def get_statistic_points(pipeline_step: PipelineStep, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: """ - A base class for creating pipelines that apply algorithms to a model. + TODO - This abstract class serves as an interface for creating custom model - processing pipelines that encapsulate a series of algorithms to be - applied to a model using a provided dataset. + :param pipeline_step: + :param model: + :param graph: + :return: """ + container = StatisticPointsContainer() + for algorithm in pipeline_step: + for statistic_points in algorithm.get_statistic_points(model, graph).values(): + for statistic_point in statistic_points: + container.add_statistic_point(statistic_point) + + return container + + +def collect_statistics( + containers: Union[StatisticPointsContainer, List[StatisticPointsContainer]], + model: TModel, + graph: NNCFGraph, + dataset: Dataset, +) -> StatisticPointsContainer: + """ + TODO: + + :param statistic_points: + :param model: + :param graph: + :param dataset: + :return: + """ + if not isinstance(containers, list): + containers = [containers] + + statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) + for container in containers: + statistics_aggregator.register_statistic_points(container) + statistics_aggregator.collect_statistics(model, graph) + + return statistics_aggregator.statistic_points + + +class Pipeline: + """ + A class for creating pipelines that apply algorithms to a model. + + This class is used for creating custom model processing pipelines + that encapsulate a series of algorithms to be applied to a model + using a provided dataset. + + A pipeline consists of pipeline steps. Each pipeline step is a + sequence of Algorithm class instances whose statistic points are + combined and collected using the model obtained after the previous + pipeline step. The collected statistic points are used for all + algorithms in this step. + """ + + def __init__(self, pipeline_steps: List[PipelineStep]): + """ + :param pipeline_steps: A sequence of pipeline steps to be executed in order. + """ + self._pipeline_steps = pipeline_steps + + @property + def pipeline_steps(self) -> List[PipelineStep]: + """ + Property that defines the sequence of distinct pipeline steps to + be executed in order. + + :return: A sequence of pipeline steps to be executed in order. + """ + return self._pipeline_steps - @abstractmethod def run(self, model: TModel, dataset: Dataset) -> TModel: """ - Abstract method that defines the sequence of algorithms to be - applied to the provided model using the provided dataset. + Executes the pipeline on the provided model. :param model: A model to which pipeline will be applied. :param dataset: A dataset that holds the data items for algorithms. + :return: The updated model after executing the entire pipeline. """ + return run_pipeline_from_step(self, model, dataset) + + +def run_pipeline_step( + pipeline_step: PipelineStep, + pipeline_step_statistics: StatisticPointsContainer, + model: TModel, + graph: NNCFGraph, +) -> TModel: + """ + Executes a provided pipeline step on the provided model. + + :param pipeline_step: A sequence of algorithms representing a pipeline step. + :param pipeline_step_statistics: Statistics required to execute a pipeline step. + :param model: A model to which a pipeline step will be applied. + :param graph: A graph assosiated with a model. + :return: The updated model after executing the pipeline step. + """ + current_model = model + current_graph = graph + + for algorithm in pipeline_step[:-1]: + current_model = algorithm.apply(current_model, current_graph, pipeline_step_statistics) + current_graph = NNCFGraphFactory.create(current_model) + current_model = pipeline_step[-1].apply(current_model, current_graph, pipeline_step_statistics) + + return current_model + + +def run_pipeline_from_step( + pipeline: Pipeline, + model: TModel, + dataset: Dataset, + graph: Optional[NNCFGraph] = None, + start_step_index: int = 0, + step_index_to_statistics: Optional[Dict[int, StatisticPointsContainer]] = None, +) -> TModel: + """ + Execute the pipeline from the specified pipeline step to the end. + + :param pipeline: A pipeline part of which should be executed. + :param model: This is the model after the (start_step_index - 1)-th pipeline + step, or the initial model if start_step_index is 0. + :param dataset: A dataset that holds the data items for pipeline steps. + :param graph: A graph assosiated with a model. + :param start_step_index: Zero-based pipeline step index from which the pipeline + should be executed. + :param step_index_to_statistics: A mapping from pipeline step index to statistics + required to execute pipeline step. + :return: The updated model after executing the pipeline from the specified pipeline + step to the end. + """ + if step_index_to_statistics is None: + step_index_to_statistics = {} + + # The `step_model` and `step_graph` entities are required to execute `step_index`-th pipeline step + step_model = model + step_graph = graph + step_index = start_step_index + + for pipeline_step in pipeline.pipeline_steps[start_step_index:]: + # Create graph required to run current pipeline step + if step_graph is None: + step_graph = NNCFGraphFactory.create(step_model) + + # Collect statistics required to run current pipeline step + step_statistics = step_index_to_statistics.get(step_index) + if step_statistics is None: + statistic_points = get_statistic_points(pipeline_step, step_model, step_graph) + step_statistics = collect_statistics(statistic_points, step_model, step_graph, dataset) + + # Run current pipeline step + step_model = run_pipeline_step(pipeline_step, step_statistics, step_model, step_graph) + + step_graph = None # We should rebuild the graph for the next pipeline step + step_index += 1 + + return step_model diff --git a/nncf/quantization/pipelines/post_training/pipeline.py b/nncf/quantization/pipelines/post_training/pipeline.py index 16fc47f86f9..d908847269f 100644 --- a/nncf/quantization/pipelines/post_training/pipeline.py +++ b/nncf/quantization/pipelines/post_training/pipeline.py @@ -23,7 +23,7 @@ from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant -from nncf.quantization.pipelines.stepwise_pipeline import StepwisePipeline +from nncf.quantization.pipelines.pipeline import Pipeline from nncf.scopes import IgnoredScope TModel = TypeVar("TModel") @@ -37,7 +37,7 @@ def create_ptq_pipeline( model_type: Optional[ModelType] = None, ignored_scope: Optional[IgnoredScope] = None, advanced_parameters: Optional[AdvancedQuantizationParameters] = None, -) -> StepwisePipeline: +) -> Pipeline: """ Creates a post-training quantization pipeline. @@ -136,4 +136,4 @@ def create_ptq_pipeline( ) ) - return StepwisePipeline(pipeline_steps) + return Pipeline(pipeline_steps) diff --git a/nncf/quantization/pipelines/stepwise_pipeline.py b/nncf/quantization/pipelines/stepwise_pipeline.py deleted file mode 100644 index e38db709b53..00000000000 --- a/nncf/quantization/pipelines/stepwise_pipeline.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict, List, Optional, TypeVar, Union - -from nncf.common.factory import NNCFGraphFactory -from nncf.common.factory import StatisticsAggregatorFactory -from nncf.common.graph.graph import NNCFGraph -from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer -from nncf.data.dataset import Dataset -from nncf.quantization.algorithms.algorithm import Algorithm -from nncf.quantization.pipelines.pipeline import Pipeline - -TModel = TypeVar("TModel") -PipelineStep = List[Algorithm] - - -def get_statistic_points(pipeline_step: PipelineStep, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: - """ - TODO - - :param pipeline_step: - :param model: - :param graph: - :return: - """ - container = StatisticPointsContainer() - for algorithm in pipeline_step: - for statistic_points in algorithm.get_statistic_points(model, graph).values(): - for statistic_point in statistic_points: - container.add_statistic_point(statistic_point) - - return container - - -def collect_statistics( - containers: Union[StatisticPointsContainer, List[StatisticPointsContainer]], - model: TModel, - graph: NNCFGraph, - dataset: Dataset, -) -> StatisticPointsContainer: - """ - TODO: - - :param statistic_points: - :param model: - :param graph: - :param dataset: - :return: - """ - if not isinstance(containers, list): - containers = [containers] - - statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset) - for container in containers: - statistics_aggregator.register_statistic_points(container) - statistics_aggregator.collect_statistics(model, graph) - - return statistics_aggregator.statistic_points - - -class StepwisePipeline(Pipeline): - """ - A class for creating sequential model processing pipelines with distinct steps. - - This class extends the base `Pipeline` class to provide access to each distinct - step of the pipeline. Each pipeline step is a sequence of `Algorithm` class - instances whose statistic points are combained and collected using the model - that was obtained after previous pipeline step. Collected statistic points are - used for all algorothms in this step. - """ - - def __init__(self, pipeline_steps: List[PipelineStep]): - """ - :param pipeline_steps: A sequence of pipeline steps to be executed in order. - """ - self._pipeline_steps = pipeline_steps - - @property - def pipeline_steps(self) -> List[PipelineStep]: - """ - Property that defines the sequence of distinct pipeline steps to - be executed in order. - - :return: A sequence of pipeline steps to be executed in order. - """ - return self._pipeline_steps - - def run(self, model: TModel, dataset: Dataset) -> TModel: - """ - TODO: - - :param model: A model to which pipeline will be applied. - :param dataset: A dataset that holds the data items for algorithms. - :return: The updated model after executing the entire pipeline. - """ - return run_pipeline_from_step(self, model, dataset) - - -def run_pipeline_step( - pipeline_step: PipelineStep, - pipeline_step_statistics: StatisticPointsContainer, - model: TModel, - graph: NNCFGraph, -) -> TModel: - """ - Executes a provided pipeline step on the provided model. - - :param pipeline_step: A sequence of algorithms representing a pipeline step. - :param pipeline_step_statistics: Statistics required to execute a pipeline step. - :param model: A model to which a pipeline step will be applied. - :param graph: A graph assosiated with a model. - :return: The updated model after executing the pipeline step. - """ - current_model = model - current_graph = graph - - for algorithm in pipeline_step[:-1]: - current_model = algorithm.apply(current_model, current_graph, pipeline_step_statistics) - current_graph = NNCFGraphFactory.create(current_model) - current_model = pipeline_step[-1].apply(current_model, current_graph, pipeline_step_statistics) - - return current_model - - -def run_pipeline_from_step( - pipeline: StepwisePipeline, - model: TModel, - dataset: Dataset, - graph: Optional[NNCFGraph] = None, - start_step_index: int = 0, - step_index_to_statistics: Optional[Dict[int, StatisticPointsContainer]] = None, -) -> TModel: - """ - Execute the pipeline from the specified pipeline step to the end. - - :param pipeline: A pipeline part of which should be executed. - :param model: This is the model after the (start_step_index - 1)-th pipeline - step, or the initial model if start_step_index is 0. - :param dataset: A dataset that holds the data items for pipeline steps. - :param graph: A graph assosiated with a model. - :param start_step_index: Zero-based pipeline step index from which the pipeline - should be executed. - :param step_index_to_statistics: A mapping from pipeline step index to statistics - required to execute pipeline step. - :return: The updated model after executing the pipeline from the specified pipeline - step to the end. - """ - if step_index_to_statistics is None: - step_index_to_statistics = {} - - # The `step_model` and `step_graph` entities are required to execute `step_index`-th pipeline step - step_model = model - step_graph = graph - step_index = start_step_index - - for pipeline_step in pipeline.pipeline_steps[start_step_index:]: - # Create graph required to run current pipeline step - if step_graph is None: - step_graph = NNCFGraphFactory.create(step_model) - - # Collect statistics required to run current pipeline step - step_statistics = step_index_to_statistics.get(step_index) - if step_statistics is None: - statistic_points = get_statistic_points(pipeline_step, step_model, step_graph) - step_statistics = collect_statistics(statistic_points, step_model, step_graph, dataset) - - # Run current pipeline step - step_model = run_pipeline_step(pipeline_step, step_statistics, step_model, step_graph) - - step_graph = None # We should rebuild the graph for the next pipeline step - step_index += 1 - - return step_model diff --git a/tests/post_training/test_templates/test_bias_correction.py b/tests/post_training/test_templates/test_bias_correction.py index 8bc4e271030..e6546ede1fa 100644 --- a/tests/post_training/test_templates/test_bias_correction.py +++ b/tests/post_training/test_templates/test_bias_correction.py @@ -20,8 +20,8 @@ from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection from nncf.quantization.algorithms.bias_correction.backend import BiasCorrectionAlgoBackend +from nncf.quantization.pipelines.pipeline import Pipeline from nncf.quantization.pipelines.post_training.pipeline import create_ptq_pipeline -from nncf.quantization.pipelines.stepwise_pipeline import StepwisePipeline from tests.post_training.test_templates.helpers import ConvTestModel from tests.post_training.test_templates.helpers import MultipleConvTestModel from tests.post_training.test_templates.helpers import SplittedModel @@ -94,7 +94,7 @@ def map_references(ref_biases: Dict) -> Dict[str, List]: return ref_biases @staticmethod - def get_quantization_algorithm(disable_bias_correction=False) -> StepwisePipeline: + def get_quantization_algorithm(disable_bias_correction=False) -> Pipeline: return create_ptq_pipeline( subset_size=1, fast_bias_correction=False, From 37c8666beddc4087b3ebfcf6a05e99664356362a Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Thu, 21 Sep 2023 12:28:12 +0100 Subject: [PATCH 7/7] Add additional methods to Pipeline class --- .../hyperparameter_tuner/pipeline.py | 27 +-- nncf/quantization/pipelines/pipeline.py | 171 +++++++++--------- 2 files changed, 94 insertions(+), 104 deletions(-) diff --git a/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py b/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py index 4bc755f6d1f..f8304757f7f 100644 --- a/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py +++ b/nncf/quantization/pipelines/hyperparameter_tuner/pipeline.py @@ -29,9 +29,6 @@ from nncf.quantization.algorithms.accuracy_control.subset_selection import select_subset from nncf.quantization.pipelines.pipeline import Pipeline from nncf.quantization.pipelines.pipeline import collect_statistics -from nncf.quantization.pipelines.pipeline import get_statistic_points -from nncf.quantization.pipelines.pipeline import run_pipeline_from_step -from nncf.quantization.pipelines.pipeline import run_pipeline_step TModel = TypeVar("TModel") TTensor = TypeVar("TTensor") @@ -290,10 +287,10 @@ def run(self, model: TModel, dataset: Dataset) -> TModel: if not step_param_grid: # TODO(andrey-churkin): Think about how it can be avoided. params = apply_combination(self._init_params, best_settings) - pipeline_step = self._pipeline_cls(**params).pipeline_steps[step_index] - container = get_statistic_points(pipeline_step, step_model, step_graph) + pipeline = self._pipeline_cls(**params) + container = pipeline.get_statistic_points_for_step(step_index, step_model, step_graph) step_statistics = collect_statistics(container, step_model, step_graph, self._calibration_dataset) - step_model = run_pipeline_step(pipeline_step, step_statistics, step_model, step_graph) + step_model = pipeline.run_step(step_index, step_statistics, step_model, step_graph) continue step_combinations = create_combinations(step_param_grid) @@ -318,9 +315,9 @@ def run(self, model: TModel, dataset: Dataset) -> TModel: ) best_settings.update(step_combinations[step_best_combination_key]) - pipeline_step = self._pipelines[step_best_combination_key].pipeline_steps[step_index] - step_model = run_pipeline_step( - pipeline_step, self._step_index_to_statistics[step_index], step_model, step_graph + pipeline = self._pipelines[step_best_combination_key] + step_model = pipeline.run_step( + step_index, self._step_index_to_statistics[step_index], step_model, step_graph ) # TODO(andrey-churkin): Show final best settings @@ -357,7 +354,7 @@ def _prepare_pipeline_step( # Collect statistics required to execute `step_index`-th pipeline step containers = [ - get_statistic_points(pipeline.pipeline_steps[step_index], step_model, step_graph) + pipeline.get_statistic_points_for_step(step_index, step_model, step_graph) for pipeline in self._pipelines.values() ] self._step_index_to_statistics[step_index] = collect_statistics( @@ -386,13 +383,9 @@ def _calculate_combination_score( if combination_key in self._calculated_scores: return self._calculated_scores[combination_key] - model = run_pipeline_from_step( - self._pipelines[combination_key], - step_model, - self._calibration_dataset, - step_graph, - step_index, - self._step_index_to_statistics, + pipeline = self._pipelines[combination_key] + model = pipeline.run_from_step( + step_model, self._calibration_dataset, step_graph, step_index, self._step_index_to_statistics ) score = self._validate_model(model, dataset, subset_indices) diff --git a/nncf/quantization/pipelines/pipeline.py b/nncf/quantization/pipelines/pipeline.py index 5416829fdbf..497d726c53b 100644 --- a/nncf/quantization/pipelines/pipeline.py +++ b/nncf/quantization/pipelines/pipeline.py @@ -22,24 +22,6 @@ PipelineStep = List[Algorithm] -def get_statistic_points(pipeline_step: PipelineStep, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: - """ - TODO - - :param pipeline_step: - :param model: - :param graph: - :return: - """ - container = StatisticPointsContainer() - for algorithm in pipeline_step: - for statistic_points in algorithm.get_statistic_points(model, graph).values(): - for statistic_point in statistic_points: - container.add_statistic_point(statistic_point) - - return container - - def collect_statistics( containers: Union[StatisticPointsContainer, List[StatisticPointsContainer]], model: TModel, @@ -105,81 +87,96 @@ def run(self, model: TModel, dataset: Dataset) -> TModel: :param dataset: A dataset that holds the data items for algorithms. :return: The updated model after executing the entire pipeline. """ - return run_pipeline_from_step(self, model, dataset) - - -def run_pipeline_step( - pipeline_step: PipelineStep, - pipeline_step_statistics: StatisticPointsContainer, - model: TModel, - graph: NNCFGraph, -) -> TModel: - """ - Executes a provided pipeline step on the provided model. + return self.run_from_step(model, dataset) + + def run_step( + self, + step_index: int, + step_statistics: StatisticPointsContainer, + model: TModel, + graph: NNCFGraph, + ) -> TModel: + """ + Executes a provided pipeline step on the provided model. - :param pipeline_step: A sequence of algorithms representing a pipeline step. - :param pipeline_step_statistics: Statistics required to execute a pipeline step. - :param model: A model to which a pipeline step will be applied. - :param graph: A graph assosiated with a model. - :return: The updated model after executing the pipeline step. - """ - current_model = model - current_graph = graph + :param pipeline_step: A sequence of algorithms representing a pipeline step. + :param pipeline_step_statistics: Statistics required to execute a pipeline step. + :param model: A model to which a pipeline step will be applied. + :param graph: A graph assosiated with a model. + :return: The updated model after executing the pipeline step. + """ + current_model = model + current_graph = graph + + pipeline_step = self.pipeline_steps[step_index] + for algorithm in pipeline_step[:-1]: + current_model = algorithm.apply(current_model, current_graph, step_statistics) + current_graph = NNCFGraphFactory.create(current_model) + current_model = pipeline_step[-1].apply(current_model, current_graph, step_statistics) + + return current_model + + def run_from_step( + self, + model: TModel, + dataset: Dataset, + graph: Optional[NNCFGraph] = None, + start_step_index: int = 0, + step_index_to_statistics: Optional[Dict[int, StatisticPointsContainer]] = None, + ) -> TModel: + """ + Executes the pipeline from the specified pipeline step to the end. + + :param model: This is the model after the (start_step_index - 1)-th pipeline + step, or the initial model if start_step_index is 0. + :param dataset: A dataset that holds the data items for pipeline steps. + :param graph: A graph assosiated with a model. + :param start_step_index: Zero-based pipeline step index from which the pipeline + should be executed. + :param step_index_to_statistics: A mapping from pipeline step index to statistics + required to execute pipeline step. + :return: The updated model after executing the pipeline from the specified pipeline + step to the end. + """ + if step_index_to_statistics is None: + step_index_to_statistics = {} - for algorithm in pipeline_step[:-1]: - current_model = algorithm.apply(current_model, current_graph, pipeline_step_statistics) - current_graph = NNCFGraphFactory.create(current_model) - current_model = pipeline_step[-1].apply(current_model, current_graph, pipeline_step_statistics) + # The `step_model` and `step_graph` entities are required to execute `step_index`-th pipeline step + step_model = model + step_graph = graph + for step_index in range(start_step_index, len(self.pipeline_steps)): + # Create graph required to run current pipeline step + if step_graph is None: + step_graph = NNCFGraphFactory.create(step_model) - return current_model + # Collect statistics required to run current pipeline step + step_statistics = step_index_to_statistics.get(step_index) + if step_statistics is None: + statistic_points = self.get_statistic_points_for_step(step_index, step_model, step_graph) + step_statistics = collect_statistics(statistic_points, step_model, step_graph, dataset) + # Run current pipeline step + step_model = self.run_step(step_index, step_statistics, step_model, step_graph) -def run_pipeline_from_step( - pipeline: Pipeline, - model: TModel, - dataset: Dataset, - graph: Optional[NNCFGraph] = None, - start_step_index: int = 0, - step_index_to_statistics: Optional[Dict[int, StatisticPointsContainer]] = None, -) -> TModel: - """ - Execute the pipeline from the specified pipeline step to the end. - - :param pipeline: A pipeline part of which should be executed. - :param model: This is the model after the (start_step_index - 1)-th pipeline - step, or the initial model if start_step_index is 0. - :param dataset: A dataset that holds the data items for pipeline steps. - :param graph: A graph assosiated with a model. - :param start_step_index: Zero-based pipeline step index from which the pipeline - should be executed. - :param step_index_to_statistics: A mapping from pipeline step index to statistics - required to execute pipeline step. - :return: The updated model after executing the pipeline from the specified pipeline - step to the end. - """ - if step_index_to_statistics is None: - step_index_to_statistics = {} - - # The `step_model` and `step_graph` entities are required to execute `step_index`-th pipeline step - step_model = model - step_graph = graph - step_index = start_step_index + step_graph = None # We should rebuild the graph for the next pipeline step - for pipeline_step in pipeline.pipeline_steps[start_step_index:]: - # Create graph required to run current pipeline step - if step_graph is None: - step_graph = NNCFGraphFactory.create(step_model) + return step_model - # Collect statistics required to run current pipeline step - step_statistics = step_index_to_statistics.get(step_index) - if step_statistics is None: - statistic_points = get_statistic_points(pipeline_step, step_model, step_graph) - step_statistics = collect_statistics(statistic_points, step_model, step_graph, dataset) - - # Run current pipeline step - step_model = run_pipeline_step(pipeline_step, step_statistics, step_model, step_graph) + def get_statistic_points_for_step( + self, step_index: int, model: TModel, graph: NNCFGraph + ) -> StatisticPointsContainer: + """ + TODO - step_graph = None # We should rebuild the graph for the next pipeline step - step_index += 1 + :param pipeline_step: + :param model: + :param graph: + :return: + """ + container = StatisticPointsContainer() + for algorithm in self.pipeline_steps[step_index]: + for statistic_points in algorithm.get_statistic_points(model, graph).values(): + for statistic_point in statistic_points: + container.add_statistic_point(statistic_point) - return step_model + return container