From def800966d92fc412bdef25a1298ddf28fa8d6a4 Mon Sep 17 00:00:00 2001 From: Aleksei Kashapov Date: Mon, 25 Sep 2023 10:21:18 +0200 Subject: [PATCH] [OV] Introduce support of quantization If operation (#2101) ### Changes Add support of quantization for OpenVINO models with If operation - https://docs.openvino.ai/2023.0/openvino_docs_ops_infrastructure_If_8.html ### Reason for changes Customer request / Obtain the maximum performance for models with inner subgraphs under If operation ### Related tickets 113826 ### Tests Tested on a model attached to a ticket Add synthetic model with If operation --- nncf/common/logging/track_progress.py | 3 + .../graph/metatypes/openvino_metatypes.py | 6 + nncf/openvino/graph/model_transformer.py | 43 +++ nncf/openvino/graph/node_utils.py | 21 ++ .../graph/transformations/commands.py | 38 +++ .../openvino/quantization/quantize_ifmodel.py | 307 ++++++++++++++++++ nncf/openvino/quantization/quantize_model.py | 62 ++++ .../algorithms/min_max/algorithm.py | 10 +- .../quantized/IfModel_else.dot | 51 +++ .../quantized/IfModel_main.dot | 11 + .../quantized/IfModel_then.dot | 51 +++ tests/openvino/native/models.py | 20 ++ .../native/quantization/test_graphs.py | 42 +++ 13 files changed, 662 insertions(+), 3 deletions(-) create mode 100644 nncf/openvino/quantization/quantize_ifmodel.py create mode 100644 tests/openvino/native/data/reference_graphs/quantized/IfModel_else.dot create mode 100644 tests/openvino/native/data/reference_graphs/quantized/IfModel_main.dot create mode 100644 tests/openvino/native/data/reference_graphs/quantized/IfModel_then.dot diff --git a/nncf/common/logging/track_progress.py b/nncf/common/logging/track_progress.py index d623464bf9c..9827bc72085 100644 --- a/nncf/common/logging/track_progress.py +++ b/nncf/common/logging/track_progress.py @@ -114,6 +114,9 @@ def __init__( TimeRemainingColumn(), ) ) + + disable = disable or (hasattr(sequence, "__len__") and len(sequence) == 0) + self.progress = Progress( *self.columns, auto_refresh=auto_refresh, diff --git a/nncf/openvino/graph/metatypes/openvino_metatypes.py b/nncf/openvino/graph/metatypes/openvino_metatypes.py index 15d1a7648d1..73b18efd3aa 100644 --- a/nncf/openvino/graph/metatypes/openvino_metatypes.py +++ b/nncf/openvino/graph/metatypes/openvino_metatypes.py @@ -673,6 +673,12 @@ class OVAbsMetatype(OVOpMetatype): op_names = ["Abs"] +@OV_OPERATOR_METATYPES.register() +class OVIfMetatype(OVOpMetatype): + name = "IfOp" + op_names = ["If"] + + @OV_OPERATOR_METATYPES.register() class OVGroupNormalizationMetatype(OVOpMetatype): name = "GroupNormalizationOp" diff --git a/nncf/openvino/graph/model_transformer.py b/nncf/openvino/graph/model_transformer.py index 6a06d2519e9..19e43f4b131 100644 --- a/nncf/openvino/graph/model_transformer.py +++ b/nncf/openvino/graph/model_transformer.py @@ -25,12 +25,14 @@ from nncf.openvino.graph.node_utils import get_result_node_name from nncf.openvino.graph.transformations.commands import OVBiasCorrectionCommand from nncf.openvino.graph.transformations.commands import OVBiasInsertionCommand +from nncf.openvino.graph.transformations.commands import OVExtractIfBodyCommand from nncf.openvino.graph.transformations.commands import OVFQNodeRemovingCommand from nncf.openvino.graph.transformations.commands import OVInplaceFnInsertionCommand from nncf.openvino.graph.transformations.commands import OVModelExtractionCommand from nncf.openvino.graph.transformations.commands import OVMultiplyInsertionCommand from nncf.openvino.graph.transformations.commands import OVOutputInsertionCommand from nncf.openvino.graph.transformations.commands import OVQuantizerInsertionCommand +from nncf.openvino.graph.transformations.commands import OVUpdateIfBodyCommand from nncf.openvino.graph.transformations.commands import OVWeightUpdateCommand from nncf.quantization.fake_quantize import FakeQuantizeParameters @@ -52,6 +54,8 @@ def __init__(self, model: TModel): (OVOutputInsertionCommand, self._apply_output_insertion_transformations), (OVBiasInsertionCommand, self._apply_bias_insertion_transformations), (OVMultiplyInsertionCommand, self._apply_multiply_insertion_transformations), + (OVUpdateIfBodyCommand, self._apply_update_if_body_transformations), + (OVExtractIfBodyCommand, self._apply_extract_if_body_transformation), ] @staticmethod @@ -526,3 +530,42 @@ def _apply_multiply_insertion_transformations( destination_port.replace_source_output(multiply_node.output(0)) return model + + @staticmethod + def _apply_update_if_body_transformations( + model: ov.Model, transformations: List[OVUpdateIfBodyCommand] + ) -> ov.Model: + """ + Update model body for IF node. + + :param model: Model to update and insert a new subgraph. + :param transformations: Transformations with information of If node and an updated subgraph. + :return: Original model with an updated subgraph. + """ + name_to_node_mapping = OVModelTransformer._get_name_to_node_mapping(model) + for transformation in transformations: + subgraph_model = transformation.subgraph_model + port_id = transformation.target_point.port_id + node_name = transformation.target_point.target_node_name + node = name_to_node_mapping[node_name] + node.set_function(port_id, subgraph_model) + return model + + @staticmethod + def _apply_extract_if_body_transformation( + model: ov.Model, transformations: List[OVExtractIfBodyCommand] + ) -> ov.Model: + """ + Extract a model body from If node. + + :param model: Model from which extracts a subgraph. + :param transformations: Transformations with information from which + If node and input port extract a model subgraph. + :return: Model subgraph. + """ + transformation = transformations[-1] + name_to_node_mapping = OVModelTransformer._get_name_to_node_mapping(model) + ov_node = name_to_node_mapping[transformation.if_node_name] + if transformation.if_body_condition: + return ov.Model(ov_node.get_function(0)) # ticket: 121115 + return ov.Model(ov_node.get_function(1)) # ticket: 121115 diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index 0a97c826376..9c9d41137cf 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -23,7 +23,9 @@ from nncf.openvino.graph.metatypes.openvino_metatypes import OVAddMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVConstantMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvertMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVIfMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import get_node_metatype InplaceInsertionFnType = Callable[[ov.Node, int], ov.Node] @@ -49,6 +51,25 @@ def is_node_with_bias(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: return bias_constant is not None +def get_number_if_op(model: ov.Model) -> int: + """ + Returns number of If operation in a model. + + :param model: Model. + :return: True if Model has If operation, False - otherwise. + """ + + def cnt_if_op(model: ov.Model, cnt: int) -> int: + for op in model.get_ops(): + if get_node_metatype(op) == OVIfMetatype: + cnt += 1 + cnt = cnt_if_op(op.get_function(0), cnt) + cnt = cnt_if_op(op.get_function(1), cnt) + return cnt + + return cnt_if_op(model, 0) + + def get_const_value(const_node: ov.Node) -> np.ndarray: """ Returns the constant tensor for the node. diff --git a/nncf/openvino/graph/transformations/commands.py b/nncf/openvino/graph/transformations/commands.py index 232b9209ca5..491515aa0f5 100644 --- a/nncf/openvino/graph/transformations/commands.py +++ b/nncf/openvino/graph/transformations/commands.py @@ -12,6 +12,7 @@ from typing import List import numpy as np +import openvino.runtime as ov from nncf.common.graph.transformations.commands import Command from nncf.common.graph.transformations.commands import TargetPoint @@ -191,3 +192,40 @@ def __init__( def union(self, other: "TransformationCommand") -> "TransformationCommand": # Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand raise NotImplementedError() + + +class OVUpdateIfBodyCommand(TransformationCommand): + """ + Updates If node body. + """ + + def __init__(self, target_point: OVTargetPoint, body_model: ov.Model): + """ + :param target_point: The TargetPoint instance for the change that contains layer's information. + :param body_model: A new model to set. + """ + super().__init__(TransformationType.CHANGE, target_point) + self.subgraph_model = body_model + + def union(self, other: "TransformationCommand") -> "TransformationCommand": + # Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand + raise NotImplementedError() + + +class OVExtractIfBodyCommand(Command): + """ + Extracts If node body. + """ + + def __init__(self, if_node_name: str, if_body_condition: bool): + """ + :param target_point: The TargetPoint instance for the extraction that contains layer's information. + :param if_body_condition: If true extracts then body, else - else body. + """ + super().__init__(TransformationType.EXTRACT) + self.if_node_name = if_node_name + self.if_body_condition = if_body_condition + + def union(self, other: "TransformationCommand") -> "TransformationCommand": + # Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand + raise NotImplementedError() diff --git a/nncf/openvino/quantization/quantize_ifmodel.py b/nncf/openvino/quantization/quantize_ifmodel.py new file mode 100644 index 00000000000..bf30e3bed91 --- /dev/null +++ b/nncf/openvino/quantization/quantize_ifmodel.py @@ -0,0 +1,307 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from itertools import islice +from typing import List, Optional, Tuple + +import openvino.runtime as ov + +from nncf import Dataset +from nncf.common import factory +from nncf.common.engine import Engine +from nncf.common.factory import NNCFGraphFactory +from nncf.common.graph.graph import NNCFGraph +from nncf.common.graph.graph import NNCFNode +from nncf.common.graph.model_transformer import ModelTransformer +from nncf.common.graph.operator_metatypes import OperatorMetatype +from nncf.common.graph.transformations.commands import TargetType +from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.logging import nncf_logger +from nncf.common.logging.track_progress import track +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer +from nncf.openvino.graph.metatypes.openvino_metatypes import OVIfMetatype +from nncf.openvino.graph.node_utils import get_number_if_op +from nncf.openvino.graph.transformations.commands import OVExtractIfBodyCommand +from nncf.openvino.graph.transformations.commands import OVOutputInsertionCommand +from nncf.openvino.graph.transformations.commands import OVTargetPoint +from nncf.openvino.graph.transformations.commands import OVUpdateIfBodyCommand +from nncf.quantization.algorithms.algorithm import Algorithm + + +def _make_dataset_for_if_bodies( + engine: Engine, + calibration_dataset: Dataset, + if_cond_input_name: str, + then_model_input_names: List[str], + else_model_input_names: List[str], + subset_size: int, +) -> Tuple[Dataset, Dataset]: + """ + Returns dataset for a then and else bodies of If node. + + :param engine: Engine to infer parent model to obtain dataitems for a child dataset. + :param calibration_dataset: Dataset to infer parent model. + :param if_cond_input_name: Input name of If node condition. + :param then_model_input_names: Names of inputs for then body + (should be in the order of passing them to a model). + :param else_model_input_names: Names of inputs for else body + (should be in the order of passing them to a model). + :param subset_size: The size of calibration_dataset. + :return Dataset: Dataset for child model. + """ + + then_dataset, else_dataset = [], [] + calibration_dataset_size = ( + min(subset_size, calibration_dataset.get_length()) + if calibration_dataset.get_length() is not None + else subset_size + ) + for input_data in track( + islice(calibration_dataset.get_inference_data(), calibration_dataset_size), + total=calibration_dataset_size, + description="Collecting the dataset for then and else bodies:", + ): + data_item = [] + results = engine.infer(input_data) + if results[if_cond_input_name]: + for name in then_model_input_names: + data_item.append(results[name]) + then_dataset.append(data_item) + else: + for name in else_model_input_names: + data_item.append(results[name]) + else_dataset.append(data_item) + nncf_logger.info(f"The length of dataset for then body is {len(then_dataset)}, else body is {len(else_dataset)}.") + return Dataset(then_dataset), Dataset(else_dataset) + + +def _extract_if_body(model_transformer: ModelTransformer, if_node: NNCFNode, if_body_condition: bool) -> ov.Model: + """ + Returns if body of If node based on a value of if_body_condition. + + :param model_transformer: ModelTransformer instance. + :param if_node: If node. + :param if_submodel_condition: If True returns then body of If node, otherwise - else body. + :return: If body. + """ + transformation_layout = TransformationLayout() + command = OVBackend.create_extract_if_body_command(if_node, if_body_condition) + transformation_layout.register(command) + return model_transformer.transform(transformation_layout) + + +def _update_if_body( + model_transformer: ModelTransformer, if_node: NNCFNode, if_body_condition: bool, body: ov.Model +) -> ov.Model: + """ + Update body of If node, based on if_body_condition. + + :param model_transformer: ModelTransformer instance. + :param if_node: If node. + :param if_body_condition: Condition of If node body. + :param body: New body. + :return: Updated model with a new body of If node. + """ + transformation_layout = TransformationLayout() + command = OVBackend.create_update_body_command(if_node, if_body_condition, body) + transformation_layout.register(command) + return model_transformer.transform(transformation_layout) + + +def _add_outputs_before_if_node(model_transformer: ModelTransformer, model: ov.Model, if_node: NNCFNode) -> ov.Model: + """ + Inserts extra outputs on If node inputs. + + :param model_transformer: ModelTransformer instance. + :param model: Model instance. + :param if_node: If node. + :return: Model with extra outputs before If node. + """ + assert if_node.metatype == OVIfMetatype + transformation_layout = TransformationLayout() + name_to_node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} + ov_node = name_to_node_mapping[if_node.node_name] + port_ids = range(len(ov_node.inputs())) + for port_id in port_ids: + transformation_layout.register( + OVOutputInsertionCommand(OVTargetPoint(TargetType.PRE_LAYER_OPERATION, if_node.node_name, port_id)) + ) + return model_transformer.transform(transformation_layout) + + +def apply_algorithm_if_bodies( + algorithm: Algorithm, + parent_model: ov.Model, + parent_graph: NNCFGraph, + parent_dataset: Dataset, + subset_size: int, + current_model_num: int, + all_models_num: int, + parent_statistic_points: Optional[StatisticPointsContainer] = None, +) -> Tuple[ov.Model, int]: + """ + Applies an algorithm recursievley to each bodies of If node. + + :param parent_model: Model to apply algorithm. + :param parent_graph: Graph of a model. + :param parent_dataset: Dataset for algorithm. + :param subset_size: Size of a dataset to use for calibration. + :param current_model_num: Current model number. + :param all_models_num: All model numbers. + :param parent_statistic_points: Statistics points for algorithm. + :return: A model for every bodies of If nodes the algorithm was applied and the latest model number. + """ + nncf_logger.info(f"Iteration [{current_model_num}/{all_models_num}] ...") + quantized_model = algorithm.apply(parent_model, parent_graph, parent_statistic_points, parent_dataset) + if get_number_if_op(parent_model) == 0: + return quantized_model, current_model_num + model_transformer_fp32 = factory.ModelTransformerFactory.create(parent_model) + for if_node in parent_graph.get_nodes_by_metatypes(OVBackend.if_node_metatypes()): + then_model_input_names = OVBackend.get_if_body_input_names(parent_model, if_node, True) + else_model_input_names = OVBackend.get_if_body_input_names(parent_model, if_node, False) + if_cond_input_name = OVBackend.get_if_cond_input_name(parent_model, if_node) + parent_model_with_additional_outputs = _add_outputs_before_if_node( + model_transformer_fp32, parent_model, if_node + ) + then_dataset, else_dataset = _make_dataset_for_if_bodies( + factory.EngineFactory.create(parent_model_with_additional_outputs), + parent_dataset, + if_cond_input_name, + then_model_input_names, + else_model_input_names, + subset_size, + ) + then_model = _extract_if_body(model_transformer_fp32, if_node, True) + else_model = _extract_if_body(model_transformer_fp32, if_node, False) + then_quantized_model, current_model_num = apply_algorithm_if_bodies( + algorithm, + then_model, + NNCFGraphFactory.create(then_model), + then_dataset, + subset_size, + current_model_num + 1, + all_models_num, + ) + else_quantized_model, current_model_num = apply_algorithm_if_bodies( + algorithm, + else_model, + NNCFGraphFactory.create(else_model), + else_dataset, + subset_size, + current_model_num + 1, + all_models_num, + ) + model_transformer_int8 = factory.ModelTransformerFactory.create(quantized_model) + quantized_model = _update_if_body(model_transformer_int8, if_node, True, then_quantized_model) + model_transformer_int8 = factory.ModelTransformerFactory.create(quantized_model) + quantized_model = _update_if_body(model_transformer_int8, if_node, False, else_quantized_model) + return quantized_model, current_model_num + + +class OVBackend: + @staticmethod + def _get_if_body_port_id(if_body_condition: bool): + """ + Returns port id of a If body based on if_body_condition. + + :param if_body_condition: Condition of If node. + :return: Port id of body of If node. + """ + return int(not if_body_condition) + + @staticmethod + def if_node_metatypes() -> List[OperatorMetatype]: + """ + Returns metatypes that map to If node. + + :return: Metatypes mapped to If node. + """ + return [OVIfMetatype] + + @staticmethod + def get_if_body_input_names(model: ov.Model, if_node: NNCFNode, if_body_condition: bool) -> List[str]: + """ + Returns input names of If node body based on if_body_condition. + The order of inputs are in a way that they are passed to the model during inference. + + :param model: Original model. + :param if_node: If node. + :param if_body_condition: True for then body, else for else body. + :return: Input names of If body. + """ + input_names = [] + name_to_node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} + ov_node = name_to_node_mapping[if_node.node_name] + input_indices = [ + desc.input_index + for desc in ov_node.get_input_descriptions(OVBackend._get_if_body_port_id(if_body_condition)) + ] + input_names.extend([ov_node.input_values()[index].any_name for index in input_indices]) + return input_names + + @staticmethod + def get_if_cond_input_name(model: ov.Model, if_node: NNCFNode) -> str: + """ + Returns name of condition input of If node. + + :param model: Model. + :param if_node: If node. + :return: Name of condition input of If node. + """ + name_to_node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} + ov_node = name_to_node_mapping[if_node.node_name] + return ov_node.input_values()[0].any_name + + @staticmethod + def create_update_body_command(if_node: NNCFNode, if_body_condition: bool, body: ov.Model) -> OVUpdateIfBodyCommand: + """ + Returns a command for setting a body of If node by a new one. + + :param if_node: If node. + :param if_body_condition: Condition of If node. + :param body: A new body to set. + :return: Command to update If node body. + """ + target_point = OVTargetPoint( + TargetType.LAYER, if_node.node_name, OVBackend._get_if_body_port_id(if_body_condition) + ) + return OVUpdateIfBodyCommand(target_point, body) + + @staticmethod + def create_extract_if_body_command(if_node: NNCFNode, if_body_condition: bool) -> OVExtractIfBodyCommand: + """ + Returns a command for extraction body of If node. + If if_body_condition is True, extract then body, otherwise - else body. + + :param if_node: If node. + :param if_body_condition: Condition of body of If node. + :return: Extracted body of If node. + """ + return OVExtractIfBodyCommand(if_node.node_name, if_body_condition) + + @staticmethod + def create_output_insertion_commands(model: ov.Model, if_node: NNCFNode) -> List[OVOutputInsertionCommand]: + """ + Returns output insertion commands on + + :param ov.Model model: + :param NNCFNode if_node: + :return List[OVOutputInsertionCommand]: + """ + assert if_node.metatype == OVIfMetatype + commands = [] + name_to_node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} + ov_node = name_to_node_mapping[if_node.node_name] + for port_id in range(len(ov_node.inputs())): + commands.append( + OVOutputInsertionCommand(OVTargetPoint(TargetType.PRE_LAYER_OPERATION, if_node.node_name, port_id)) + ) + return commands diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index 2faac879d86..fba0d2853cc 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -20,8 +20,10 @@ from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset from nncf.openvino.graph.nncf_graph_builder import GraphConverter +from nncf.openvino.graph.node_utils import get_number_if_op from nncf.openvino.quantization.backend_parameters import BackendParameters from nncf.openvino.quantization.backend_parameters import is_weight_compression_needed +from nncf.openvino.quantization.quantize_ifmodel import apply_algorithm_if_bodies from nncf.openvino.quantization.weights_compression import insert_pre_compression_operations from nncf.parameters import DropType from nncf.parameters import ModelType @@ -91,6 +93,64 @@ def dump_parameters(model: ov.Model, parameters: Dict, path: Optional[List] = No nncf_logger.debug(f"Unable to dump optimization parameters due to error: {e}") +@tracked_function(NNCF_OV_CATEGORY, [CompressionStartedWithQuantizeApi(), "target_device", "preset"]) +def native_quantize_if_op_impl( + model: ov.Model, + calibration_dataset: Dataset, + preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + target_device: TargetDevice = TargetDevice.ANY, + subset_size: int = 300, + fast_bias_correction: bool = True, + model_type: Optional[ModelType] = None, + ignored_scope: Optional[IgnoredScope] = None, + advanced_parameters: Optional[AdvancedQuantizationParameters] = None, +) -> ov.Model: + """ + Implementation of the `quantize()` method for the OpenVINO backend via the OpenVINO Runtime API. + """ + if not fast_bias_correction: + raise NotImplementedError( + "The BiasCorrection algorithm is not supported for OpenVINO models with If operation." + ) + quantization_algorithm = PostTrainingQuantization( + preset=preset, + target_device=target_device, + subset_size=subset_size, + fast_bias_correction=fast_bias_correction, + model_type=model_type, + ignored_scope=ignored_scope, + advanced_parameters=advanced_parameters, + ) + + graph = GraphConverter.create_nncf_graph(model) + if_ops_number = get_number_if_op(model) + all_models_number = if_ops_number * 2 + 1 + nncf_logger.info( + f"The model consists of {if_ops_number} If node(-s) with then and else bodies. \ + Main model and all If bodies will be quantized recursively." + ) + quantized_model, _ = apply_algorithm_if_bodies( + quantization_algorithm, model, graph, calibration_dataset, subset_size, 1, all_models_number + ) + + if is_weight_compression_needed(advanced_parameters): + compress_quantize_weights_transformation(quantized_model) + + dump_parameters( + quantized_model, + { + "preset": preset.value, + "target_device": target_device.value, + "subset_size": subset_size, + "fast_bias_correction": fast_bias_correction, + "model_type": model_type, + "ignored_scope": ignored_scope, + "advanced_parameters": convert_to_dict_recursively(advanced_parameters), + }, + ) + return quantized_model + + @tracked_function(NNCF_OV_CATEGORY, [CompressionStartedWithQuantizeApi(), "target_device", "preset"]) def native_quantize_impl( model: ov.Model, @@ -297,6 +357,8 @@ def quantize_impl( quantize_fn = pot_quantize_impl else: quantize_fn = native_quantize_impl + if get_number_if_op(model) > 0: + quantize_fn = native_quantize_if_op_impl return quantize_fn( model, diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 72718bbeb68..861e0226873 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -165,13 +165,16 @@ def __init__( quantizer_group, preset, self._quantization_params[quantizer_group] ) + self._reset_cache() + self._algorithm_key = f"MMQ_{hash(self)}" + + def _reset_cache(self): # It prevents the duplicate weight quantizers from being added. # It can happen when you have layers that share the identical weight tensor. self._quantization_target_points_to_qconfig = ( collections.OrderedDict() ) # type: OrderedDict[TargetPoint, QuantizerConfig] self._unified_scale_groups = [] - self._algorithm_key = f"MMQ_{hash(self)}" @property def available_backends(self) -> Dict[str, BackendType]: @@ -695,13 +698,14 @@ def filter_func(point: StatisticPoint) -> bool: graph, quantization_target_point, qconfig, parameters ) transformation_layout.register(command) - + if not transformation_layout.transformations: + nncf_logger.info("The model has no operations to apply quantization.") quantized_model = model_transformer.transform(transformation_layout) return quantized_model def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) - + self._reset_cache() quantization_target_points, _ = self._get_quantization_target_points(model, graph) output = StatisticPointsContainer() for quantization_target_point, qconfig in quantization_target_points.items(): diff --git a/tests/openvino/native/data/reference_graphs/quantized/IfModel_else.dot b/tests/openvino/native/data/reference_graphs/quantized/IfModel_else.dot new file mode 100644 index 00000000000..10e35270838 --- /dev/null +++ b/tests/openvino/native/data/reference_graphs/quantized/IfModel_else.dot @@ -0,0 +1,51 @@ +strict digraph { +"0 Input_1" [id=0, type=Parameter]; +"1 Input_2" [id=1, type=Parameter]; +"2 Sub" [id=2, type=Subtract]; +"3 Add" [id=3, type=Add]; +"4 Sub/fq_output_0" [id=4, type=FakeQuantize]; +"5 Mul" [id=5, type=Multiply]; +"6 Conv" [id=6, type=Convolution]; +"7 Transpose" [id=7, type=Transpose]; +"8 Conv_Add" [id=8, type=Add]; +"9 Concat_70" [id=9, type=Concat]; +"10 Relu" [id=10, type=Relu]; +"11 Result" [id=11, type=Result]; +"12 Constant_68" [id=12, type=Constant]; +"13 Constant_66" [id=13, type=Constant]; +"14 Constant_64" [id=14, type=Constant]; +"15 Bias" [id=15, type=Constant]; +"16 Conv/fq_weights_1" [id=16, type=Multiply]; +"17 Constant_12068" [id=17, type=Constant]; +"18 Convert_12170" [id=18, type=Convert]; +"19 Constant_58" [id=19, type=Constant]; +"20 Constant_9391" [id=20, type=Constant]; +"21 Constant_9390" [id=21, type=Constant]; +"22 Constant_9389" [id=22, type=Constant]; +"23 Constant_9388" [id=23, type=Constant]; +"24 Constant_56" [id=24, type=Constant]; +"0 Input_1" -> "2 Sub" [label="[1, 3, 4, 2]", style=solid]; +"1 Input_2" -> "3 Add" [label="[1, 3, 2, 4]", style=solid]; +"2 Sub" -> "4 Sub/fq_output_0" [label="[1, 3, 4, 2]", style=solid]; +"3 Add" -> "5 Mul" [label="[1, 3, 2, 4]", style=solid]; +"4 Sub/fq_output_0" -> "6 Conv" [label="[1, 3, 4, 2]", style=solid]; +"5 Mul" -> "7 Transpose" [label="[1, 3, 2, 4]", style=solid]; +"6 Conv" -> "8 Conv_Add" [label="[1, 3, 4, 2]", style=solid]; +"7 Transpose" -> "9 Concat_70" [label="[1, 3, 4, 2]", style=solid]; +"8 Conv_Add" -> "10 Relu" [label="[1, 3, 4, 2]", style=solid]; +"9 Concat_70" -> "11 Result" [label="[2, 3, 4, 2]", style=solid]; +"10 Relu" -> "9 Concat_70" [label="[1, 3, 4, 2]", style=solid]; +"12 Constant_68" -> "7 Transpose" [label="[4]", style=dashed]; +"13 Constant_66" -> "5 Mul" [label="[1, 3, 1, 1]", style=solid]; +"14 Constant_64" -> "3 Add" [label="[1, 3, 1, 1]", style=solid]; +"15 Bias" -> "8 Conv_Add" [label="[1, 3, 1, 1]", style=solid]; +"16 Conv/fq_weights_1" -> "6 Conv" [label="[3, 3, 1, 1]", style=solid]; +"17 Constant_12068" -> "16 Conv/fq_weights_1" [label="[3, 1, 1, 1]", style=solid]; +"18 Convert_12170" -> "16 Conv/fq_weights_1" [label="[3, 3, 1, 1]", style=solid]; +"19 Constant_58" -> "18 Convert_12170" [label="[3, 3, 1, 1]", style=dashed]; +"20 Constant_9391" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"21 Constant_9390" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"22 Constant_9389" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"23 Constant_9388" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"24 Constant_56" -> "2 Sub" [label="[1, 3, 1, 1]", style=solid]; +} diff --git a/tests/openvino/native/data/reference_graphs/quantized/IfModel_main.dot b/tests/openvino/native/data/reference_graphs/quantized/IfModel_main.dot new file mode 100644 index 00000000000..33223cf313e --- /dev/null +++ b/tests/openvino/native/data/reference_graphs/quantized/IfModel_main.dot @@ -0,0 +1,11 @@ +strict digraph { +"0 Input_1" [id=0, type=Parameter]; +"1 Input_2" [id=1, type=Parameter]; +"2 Cond_input" [id=2, type=Parameter]; +"3 If_72" [id=3, type=If]; +"4 Result" [id=4, type=Result]; +"0 Input_1" -> "3 If_72" [label="[1, 3, 4, 2]", style=solid]; +"1 Input_2" -> "3 If_72" [label="[1, 3, 2, 4]", style=solid]; +"2 Cond_input" -> "3 If_72" [label="[]", style=dashed]; +"3 If_72" -> "4 Result" [label="[2, 3, 4, 2]", style=solid]; +} diff --git a/tests/openvino/native/data/reference_graphs/quantized/IfModel_then.dot b/tests/openvino/native/data/reference_graphs/quantized/IfModel_then.dot new file mode 100644 index 00000000000..732b69959db --- /dev/null +++ b/tests/openvino/native/data/reference_graphs/quantized/IfModel_then.dot @@ -0,0 +1,51 @@ +strict digraph { +"0 Input_1" [id=0, type=Parameter]; +"1 Input_2" [id=1, type=Parameter]; +"2 Sub" [id=2, type=Subtract]; +"3 Add" [id=3, type=Add]; +"4 Sub/fq_output_0" [id=4, type=FakeQuantize]; +"5 Mul" [id=5, type=Multiply]; +"6 Conv" [id=6, type=Convolution]; +"7 Transpose" [id=7, type=Transpose]; +"8 Conv_Add" [id=8, type=Add]; +"9 Concat_36" [id=9, type=Concat]; +"10 Relu" [id=10, type=Relu]; +"11 Result" [id=11, type=Result]; +"12 Constant_34" [id=12, type=Constant]; +"13 Constant_32" [id=13, type=Constant]; +"14 Constant_30" [id=14, type=Constant]; +"15 Bias" [id=15, type=Constant]; +"16 Conv/fq_weights_1" [id=16, type=Multiply]; +"17 Constant_11914" [id=17, type=Constant]; +"18 Convert_12016" [id=18, type=Convert]; +"19 Constant_24" [id=19, type=Constant]; +"20 Constant_4685" [id=20, type=Constant]; +"21 Constant_4684" [id=21, type=Constant]; +"22 Constant_4683" [id=22, type=Constant]; +"23 Constant_4682" [id=23, type=Constant]; +"24 Constant_22" [id=24, type=Constant]; +"0 Input_1" -> "2 Sub" [label="[1, 3, 4, 2]", style=solid]; +"1 Input_2" -> "3 Add" [label="[1, 3, 2, 4]", style=solid]; +"2 Sub" -> "4 Sub/fq_output_0" [label="[1, 3, 4, 2]", style=solid]; +"3 Add" -> "5 Mul" [label="[1, 3, 2, 4]", style=solid]; +"4 Sub/fq_output_0" -> "6 Conv" [label="[1, 3, 4, 2]", style=solid]; +"5 Mul" -> "7 Transpose" [label="[1, 3, 2, 4]", style=solid]; +"6 Conv" -> "8 Conv_Add" [label="[1, 3, 4, 2]", style=solid]; +"7 Transpose" -> "9 Concat_36" [label="[1, 3, 4, 2]", style=solid]; +"8 Conv_Add" -> "10 Relu" [label="[1, 3, 4, 2]", style=solid]; +"9 Concat_36" -> "11 Result" [label="[2, 3, 4, 2]", style=solid]; +"10 Relu" -> "9 Concat_36" [label="[1, 3, 4, 2]", style=solid]; +"12 Constant_34" -> "7 Transpose" [label="[4]", style=dashed]; +"13 Constant_32" -> "5 Mul" [label="[1, 3, 1, 1]", style=solid]; +"14 Constant_30" -> "3 Add" [label="[1, 3, 1, 1]", style=solid]; +"15 Bias" -> "8 Conv_Add" [label="[1, 3, 1, 1]", style=solid]; +"16 Conv/fq_weights_1" -> "6 Conv" [label="[3, 3, 1, 1]", style=solid]; +"17 Constant_11914" -> "16 Conv/fq_weights_1" [label="[3, 1, 1, 1]", style=solid]; +"18 Convert_12016" -> "16 Conv/fq_weights_1" [label="[3, 3, 1, 1]", style=solid]; +"19 Constant_24" -> "18 Convert_12016" [label="[3, 3, 1, 1]", style=dashed]; +"20 Constant_4685" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"21 Constant_4684" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"22 Constant_4683" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"23 Constant_4682" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"24 Constant_22" -> "2 Sub" [label="[1, 3, 1, 1]", style=solid]; +} diff --git a/tests/openvino/native/models.py b/tests/openvino/native/models.py index 8d02accb0cc..6440636ea50 100644 --- a/tests/openvino/native/models.py +++ b/tests/openvino/native/models.py @@ -704,3 +704,23 @@ def _create_ov_model(self): result.get_output_tensor(0).set_names(set(["Result"])) model = ov.Model([result], [input_1]) return model + + +class IfModel(OVReferenceModel): + def _create_ov_model(self): + input_1 = opset.parameter([1, 3, 4, 2], name="Input_1") + input_2 = opset.parameter([1, 3, 2, 4], name="Input_2") + input_3 = opset.parameter([], dtype=bool, name="Cond_input") + + then_body = ConvModel().ov_model + else_body = ConvModel().ov_model + + if_node = opset.if_op(input_3) + if_node.set_then_body(then_body) + if_node.set_else_body(else_body) + if_node.set_input(input_1.outputs()[0], then_body.get_parameters()[0], else_body.get_parameters()[0]) + if_node.set_input(input_2.outputs()[0], then_body.get_parameters()[1], else_body.get_parameters()[1]) + if_node.set_output(then_body.results[0], else_body.results[0]) + result = opset.result(if_node, name="Result") + model = ov.Model([result], [input_1, input_2, input_3]) + return model diff --git a/tests/openvino/native/quantization/test_graphs.py b/tests/openvino/native/quantization/test_graphs.py index b761aa68b68..e7913870285 100644 --- a/tests/openvino/native/quantization/test_graphs.py +++ b/tests/openvino/native/quantization/test_graphs.py @@ -12,11 +12,14 @@ from typing import Dict +import numpy as np import openvino.runtime as ov import pytest +from nncf import Dataset from nncf.common.quantization.structs import QuantizationPreset from nncf.openvino.graph.nncf_graph_builder import GraphConverter +from nncf.openvino.quantization.quantize_model import quantize_impl from nncf.openvino.statistics.aggregator import OVStatisticsAggregator from nncf.parameters import ModelType from nncf.parameters import TargetDevice @@ -30,6 +33,7 @@ from tests.openvino.native.models import DepthwiseConv4DModel from tests.openvino.native.models import DepthwiseConv5DModel from tests.openvino.native.models import GRUSequenceModel +from tests.openvino.native.models import IfModel from tests.openvino.native.models import MatmulSoftmaxMatmulBlock from tests.openvino.native.quantization.test_fq_params_calculation import quantize_model from tests.openvino.omz_helpers import convert_model @@ -153,3 +157,41 @@ def test_ignore_nodes_by_attribues(linear_before_reset): postfix = "T" if linear_before_reset else "F" path_ref_graph = QUANTIZED_REF_GRAPHS_DIR / f"GRUSequenceModel_linear_before_reset_{postfix}.dot" compare_nncf_graphs(quantized_model, path_ref_graph) + + +def get_dataset_for_if_model(model: ov.Model, size: int = 2) -> Dataset: + rng = np.random.default_rng(seed=0) + dataitems = [] + for i in range(size): + input_data = {} + for param in model.get_parameters(): + if param.get_element_type().get_type_name() == "boolean": + input_data[param.get_output_tensor(0).get_any_name()] = i < size // 2 + else: + input_shape = param.partial_shape.get_max_shape() + input_data[param.get_output_tensor(0).get_any_name()] = rng.uniform(0, 1, input_shape) + dataitems.append(input_data) + dataset = Dataset(dataitems) + return dataset + + +def test_if_model_fq_placement(): + if_model = IfModel() + ov_model = if_model.ov_model + dataset = get_dataset_for_if_model(ov_model) + quantized_model = quantize_impl( + ov_model, + dataset, + subset_size=2, + fast_bias_correction=True, + ) + if_ops = [op for op in quantized_model.get_ops() if op.get_type_name() == "If"] + assert len(if_ops) == 1 + if_op = if_ops[0] + main_model_path = if_model.ref_model_name + "_main.dot" + then_body_path = if_model.ref_model_name + "_then.dot" + else_body_path = if_model.ref_model_name + "_else.dot" + + compare_nncf_graphs(quantized_model, QUANTIZED_REF_GRAPHS_DIR / main_model_path) + compare_nncf_graphs(if_op.get_function(0), QUANTIZED_REF_GRAPHS_DIR / then_body_path) + compare_nncf_graphs(if_op.get_function(1), QUANTIZED_REF_GRAPHS_DIR / else_body_path)