From abf07e9a8fb109cdce9605334634cc073fd8b14f Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Wed, 11 Dec 2024 14:35:52 +0100 Subject: [PATCH 1/8] Change calculations --- nncf/openvino/graph/model_transformer.py | 82 +++++++++---------- .../fast_bias_correction/algorithm.py | 78 +++++++++++++----- .../fast_bias_correction/openvino_backend.py | 9 ++ 3 files changed, 108 insertions(+), 61 deletions(-) diff --git a/nncf/openvino/graph/model_transformer.py b/nncf/openvino/graph/model_transformer.py index a331b82314e..41d71705de9 100644 --- a/nncf/openvino/graph/model_transformer.py +++ b/nncf/openvino/graph/model_transformer.py @@ -558,50 +558,50 @@ def _apply_model_extraction_transformation( :return: Extracted sub-model. """ outputs_type = ov.Type.f32 - transformation = transformations[-1] name_to_node_mapping = OVModelTransformer._get_name_to_node_mapping(model) params, results = [], [] - for input_name, input_port_id in transformation.input_ids: - input_node = name_to_node_mapping[input_name] - if input_name in [tensor.node.get_friendly_name() for tensor in model.inputs]: - params.append(input_node) - continue - - input_port = input_node.input(input_port_id) - input_type = input_port.get_element_type() - input_node_output = input_port.get_source_output() - parameter_name = get_parameter_node_name(input_name, input_port_id) - - new_param = opset.parameter( - shape=input_node_output.partial_shape, - dtype=outputs_type, - name=parameter_name, - ) - new_input = new_param.output(0) - - if input_type != outputs_type: - new_input = opset.convert(new_param, destination_type=input_type).output(0) - - input_port.replace_source_output(new_input) - new_param_tensors = [o.get_tensor() for o in new_param.outputs()] - OVModelTransformer._update_tensor_names(new_param_tensors, [parameter_name]) - params.append(new_param) - - for output_name, output_port_id in transformation.output_ids: - output_node = name_to_node_mapping[output_name] - - result_name = get_result_node_name(output_name, output_port_id) - output_port = output_node.output(output_port_id) - if output_port.get_element_type() != outputs_type: - output_port = opset.convert(output_node, destination_type=outputs_type).output(0) - new_result = opset.result(output_port, name=result_name) - result_tensor_names = [result_name] + list(output_port.get_names()) - OVModelTransformer._update_tensor_names([new_result.get_output_tensor(0)], result_tensor_names) - results.append(new_result) - - if not results: - results = model.get_results() + for transformation in transformations: + for input_name, input_port_id in transformation.input_ids: + input_node = name_to_node_mapping[input_name] + if input_name in [tensor.node.get_friendly_name() for tensor in model.inputs]: + params.append(input_node) + continue + + input_port = input_node.input(input_port_id) + input_type = input_port.get_element_type() + input_node_output = input_port.get_source_output() + parameter_name = get_parameter_node_name(input_name, input_port_id) + + new_param = opset.parameter( + shape=input_node_output.partial_shape, + dtype=outputs_type, + name=parameter_name, + ) + new_input = new_param.output(0) + + if input_type != outputs_type: + new_input = opset.convert(new_param, destination_type=input_type).output(0) + + input_port.replace_source_output(new_input) + new_param_tensors = [o.get_tensor() for o in new_param.outputs()] + OVModelTransformer._update_tensor_names(new_param_tensors, [parameter_name]) + params.append(new_param) + + for output_name, output_port_id in transformation.output_ids: + output_node = name_to_node_mapping[output_name] + + result_name = get_result_node_name(output_name, output_port_id) + output_port = output_node.output(output_port_id) + if output_port.get_element_type() != outputs_type: + output_port = opset.convert(output_node, destination_type=outputs_type).output(0) + new_result = opset.result(output_port, name=result_name) + result_tensor_names = [result_name] + list(output_port.get_names()) + OVModelTransformer._update_tensor_names([new_result.get_output_tensor(0)], result_tensor_names) + results.append(new_result) + + if not results: + results.extend(model.get_results()) extracted_model = ov.Model(results, params) copy_rt_info(model, extracted_model, path=["nncf"]) diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 3d104cad3c9..d1ea0204fe9 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -148,6 +148,38 @@ def apply( # for which we should update bias and new bias values. node_and_new_bias_value = [] + input_feed = {} + + me_transformation_layout = TransformationLayout() + for node, bias_value in track(node_and_bias_value, description="Applying Preparing step"): + node_name = node.node_name + + if not self._backend_entity.is_quantized_weights(node, graph): + nncf_logger.debug(f"Skipping node {node_name} because weights were not quantized") + continue + + in_node_name, out_node_name = self._backend_entity.get_node_names_for_input_output_statistics(node, graph) + input_port_id, _ = self._backend_entity.get_activation_port_ids_for_bias_node(node) + + input_id = (in_node_name, input_port_id) + output_id = (out_node_name, 0) + + model_extraction_command = self._backend_entity.model_extraction_command([input_id], [output_id]) + me_transformation_layout.register(model_extraction_command) + + input_fp, input_shape = self._get_fp_inputs(statistic_points, in_node_name) + sub_input_name = self._backend_entity.get_parameter_node_name(node_name, input_port_id) + + input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape) + input_blob = self._backend_entity.create_input_data( + input_shape, input_fp, sub_input_name, input_channel_axis + ) + input_feed.update(input_blob) + + extracted_model = model_transformer.transform(me_transformation_layout) + engine = EngineFactory.create(extracted_model) + raw_output = engine.infer(input_feed) + for node, bias_value in track(node_and_bias_value, description="Applying Fast Bias correction"): node_name = node.node_name @@ -167,29 +199,35 @@ def apply( # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends. output_id = (out_node_name, 0) - extracted_model = self._extract_submodel(model_transformer, input_id, output_id) - if extracted_model is None: - nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel") - continue + sub_output_name = self._backend_entity.get_result_node_name(node_name, 0) + + # extracted_model = self._extract_submodel(model_transformer, input_id, output_id) + # if extracted_model is None: + # nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel") + # continue - sub_input_name, sub_output_name = self._backend_entity.get_sub_input_output_names(extracted_model) + # sub_input_name, sub_output_name = self._backend_entity.get_sub_input_output_names(extracted_model) output_channel_axis = node.metatype.output_channel_axis - input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape) - if bias_value.ndim > 1: - # Make index positive - output_channel_axis = range(bias_value.ndim)[output_channel_axis] - input_channel_axis = range(bias_value.ndim)[input_channel_axis] - input_blob = self._backend_entity.create_input_data( - input_shape, input_fp, sub_input_name, input_channel_axis - ) - bias_shift = self._get_bias_shift( - model=extracted_model, - input_blob=input_blob, - output_channel_axis=output_channel_axis, - output_fp=output_fp, - output_name=sub_output_name, - ) + # input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape) + # if bias_value.ndim > 1: + # # Make index positive + # output_channel_axis = range(bias_value.ndim)[output_channel_axis] + # input_channel_axis = range(bias_value.ndim)[input_channel_axis] + # input_blob = self._backend_entity.create_input_data( + # input_shape, input_fp, sub_input_name, input_channel_axis + # ) + # bias_shift = self._get_bias_shift( + # model=extracted_model, + # input_blob=input_blob, + # output_channel_axis=output_channel_axis, + # output_fp=output_fp, + # output_name=sub_output_name, + # ) + + q_outputs = self._backend_entity.process_model_output(raw_output, sub_output_name) + q_outputs = mean_per_channel(q_outputs, output_channel_axis) + bias_shift = fns.stack(output_fp) - q_outputs bias_shift = self._reshape_bias_shift(bias_shift, bias_value, output_channel_axis) updated_bias = bias_value + bias_shift diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py index 1f92559eeb8..c20bde6e905 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py @@ -29,6 +29,8 @@ from nncf.openvino.graph.transformations.commands import OVTargetPoint from nncf.openvino.statistics.collectors import get_mean_statistic_collector from nncf.quantization.algorithms.fast_bias_correction.backend import FastBiasCorrectionAlgoBackend +from nncf.openvino.graph.node_utils import get_parameter_node_name +from nncf.openvino.graph.node_utils import get_result_node_name from nncf.tensor import Tensor @@ -59,6 +61,13 @@ def mean_statistic_collector( return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace) @staticmethod + def get_parameter_node_name(node_name: str, port_id: int) -> str: + return get_parameter_node_name(node_name, port_id) + + @staticmethod + def get_result_node_name(node_name: str, port_id: int) -> str: + return get_result_node_name(node_name, port_id) + def get_sub_input_output_names(subgraph: ov.Model) -> Tuple[str, str]: return subgraph.inputs[0].get_any_name(), subgraph.outputs[0].get_any_name() From 1dbe08c34425c11d248bb6183c1634015bd34d43 Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Thu, 12 Dec 2024 09:37:26 +0100 Subject: [PATCH 2/8] Revert "Change calculations" This reverts commit abf07e9a8fb109cdce9605334634cc073fd8b14f. --- nncf/openvino/graph/model_transformer.py | 82 +++++++++---------- .../fast_bias_correction/algorithm.py | 78 +++++------------- .../fast_bias_correction/openvino_backend.py | 9 -- 3 files changed, 61 insertions(+), 108 deletions(-) diff --git a/nncf/openvino/graph/model_transformer.py b/nncf/openvino/graph/model_transformer.py index 41d71705de9..a331b82314e 100644 --- a/nncf/openvino/graph/model_transformer.py +++ b/nncf/openvino/graph/model_transformer.py @@ -558,50 +558,50 @@ def _apply_model_extraction_transformation( :return: Extracted sub-model. """ outputs_type = ov.Type.f32 + transformation = transformations[-1] name_to_node_mapping = OVModelTransformer._get_name_to_node_mapping(model) params, results = [], [] - for transformation in transformations: - for input_name, input_port_id in transformation.input_ids: - input_node = name_to_node_mapping[input_name] - if input_name in [tensor.node.get_friendly_name() for tensor in model.inputs]: - params.append(input_node) - continue - - input_port = input_node.input(input_port_id) - input_type = input_port.get_element_type() - input_node_output = input_port.get_source_output() - parameter_name = get_parameter_node_name(input_name, input_port_id) - - new_param = opset.parameter( - shape=input_node_output.partial_shape, - dtype=outputs_type, - name=parameter_name, - ) - new_input = new_param.output(0) - - if input_type != outputs_type: - new_input = opset.convert(new_param, destination_type=input_type).output(0) - - input_port.replace_source_output(new_input) - new_param_tensors = [o.get_tensor() for o in new_param.outputs()] - OVModelTransformer._update_tensor_names(new_param_tensors, [parameter_name]) - params.append(new_param) - - for output_name, output_port_id in transformation.output_ids: - output_node = name_to_node_mapping[output_name] - - result_name = get_result_node_name(output_name, output_port_id) - output_port = output_node.output(output_port_id) - if output_port.get_element_type() != outputs_type: - output_port = opset.convert(output_node, destination_type=outputs_type).output(0) - new_result = opset.result(output_port, name=result_name) - result_tensor_names = [result_name] + list(output_port.get_names()) - OVModelTransformer._update_tensor_names([new_result.get_output_tensor(0)], result_tensor_names) - results.append(new_result) - - if not results: - results.extend(model.get_results()) + for input_name, input_port_id in transformation.input_ids: + input_node = name_to_node_mapping[input_name] + if input_name in [tensor.node.get_friendly_name() for tensor in model.inputs]: + params.append(input_node) + continue + + input_port = input_node.input(input_port_id) + input_type = input_port.get_element_type() + input_node_output = input_port.get_source_output() + parameter_name = get_parameter_node_name(input_name, input_port_id) + + new_param = opset.parameter( + shape=input_node_output.partial_shape, + dtype=outputs_type, + name=parameter_name, + ) + new_input = new_param.output(0) + + if input_type != outputs_type: + new_input = opset.convert(new_param, destination_type=input_type).output(0) + + input_port.replace_source_output(new_input) + new_param_tensors = [o.get_tensor() for o in new_param.outputs()] + OVModelTransformer._update_tensor_names(new_param_tensors, [parameter_name]) + params.append(new_param) + + for output_name, output_port_id in transformation.output_ids: + output_node = name_to_node_mapping[output_name] + + result_name = get_result_node_name(output_name, output_port_id) + output_port = output_node.output(output_port_id) + if output_port.get_element_type() != outputs_type: + output_port = opset.convert(output_node, destination_type=outputs_type).output(0) + new_result = opset.result(output_port, name=result_name) + result_tensor_names = [result_name] + list(output_port.get_names()) + OVModelTransformer._update_tensor_names([new_result.get_output_tensor(0)], result_tensor_names) + results.append(new_result) + + if not results: + results = model.get_results() extracted_model = ov.Model(results, params) copy_rt_info(model, extracted_model, path=["nncf"]) diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index d1ea0204fe9..3d104cad3c9 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -148,38 +148,6 @@ def apply( # for which we should update bias and new bias values. node_and_new_bias_value = [] - input_feed = {} - - me_transformation_layout = TransformationLayout() - for node, bias_value in track(node_and_bias_value, description="Applying Preparing step"): - node_name = node.node_name - - if not self._backend_entity.is_quantized_weights(node, graph): - nncf_logger.debug(f"Skipping node {node_name} because weights were not quantized") - continue - - in_node_name, out_node_name = self._backend_entity.get_node_names_for_input_output_statistics(node, graph) - input_port_id, _ = self._backend_entity.get_activation_port_ids_for_bias_node(node) - - input_id = (in_node_name, input_port_id) - output_id = (out_node_name, 0) - - model_extraction_command = self._backend_entity.model_extraction_command([input_id], [output_id]) - me_transformation_layout.register(model_extraction_command) - - input_fp, input_shape = self._get_fp_inputs(statistic_points, in_node_name) - sub_input_name = self._backend_entity.get_parameter_node_name(node_name, input_port_id) - - input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape) - input_blob = self._backend_entity.create_input_data( - input_shape, input_fp, sub_input_name, input_channel_axis - ) - input_feed.update(input_blob) - - extracted_model = model_transformer.transform(me_transformation_layout) - engine = EngineFactory.create(extracted_model) - raw_output = engine.infer(input_feed) - for node, bias_value in track(node_and_bias_value, description="Applying Fast Bias correction"): node_name = node.node_name @@ -199,35 +167,29 @@ def apply( # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends. output_id = (out_node_name, 0) - sub_output_name = self._backend_entity.get_result_node_name(node_name, 0) - - # extracted_model = self._extract_submodel(model_transformer, input_id, output_id) - # if extracted_model is None: - # nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel") - # continue + extracted_model = self._extract_submodel(model_transformer, input_id, output_id) + if extracted_model is None: + nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel") + continue - # sub_input_name, sub_output_name = self._backend_entity.get_sub_input_output_names(extracted_model) + sub_input_name, sub_output_name = self._backend_entity.get_sub_input_output_names(extracted_model) output_channel_axis = node.metatype.output_channel_axis - # input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape) - # if bias_value.ndim > 1: - # # Make index positive - # output_channel_axis = range(bias_value.ndim)[output_channel_axis] - # input_channel_axis = range(bias_value.ndim)[input_channel_axis] - # input_blob = self._backend_entity.create_input_data( - # input_shape, input_fp, sub_input_name, input_channel_axis - # ) - # bias_shift = self._get_bias_shift( - # model=extracted_model, - # input_blob=input_blob, - # output_channel_axis=output_channel_axis, - # output_fp=output_fp, - # output_name=sub_output_name, - # ) - - q_outputs = self._backend_entity.process_model_output(raw_output, sub_output_name) - q_outputs = mean_per_channel(q_outputs, output_channel_axis) - bias_shift = fns.stack(output_fp) - q_outputs + input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape) + if bias_value.ndim > 1: + # Make index positive + output_channel_axis = range(bias_value.ndim)[output_channel_axis] + input_channel_axis = range(bias_value.ndim)[input_channel_axis] + input_blob = self._backend_entity.create_input_data( + input_shape, input_fp, sub_input_name, input_channel_axis + ) + bias_shift = self._get_bias_shift( + model=extracted_model, + input_blob=input_blob, + output_channel_axis=output_channel_axis, + output_fp=output_fp, + output_name=sub_output_name, + ) bias_shift = self._reshape_bias_shift(bias_shift, bias_value, output_channel_axis) updated_bias = bias_value + bias_shift diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py index c20bde6e905..1f92559eeb8 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py @@ -29,8 +29,6 @@ from nncf.openvino.graph.transformations.commands import OVTargetPoint from nncf.openvino.statistics.collectors import get_mean_statistic_collector from nncf.quantization.algorithms.fast_bias_correction.backend import FastBiasCorrectionAlgoBackend -from nncf.openvino.graph.node_utils import get_parameter_node_name -from nncf.openvino.graph.node_utils import get_result_node_name from nncf.tensor import Tensor @@ -61,13 +59,6 @@ def mean_statistic_collector( return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace) @staticmethod - def get_parameter_node_name(node_name: str, port_id: int) -> str: - return get_parameter_node_name(node_name, port_id) - - @staticmethod - def get_result_node_name(node_name: str, port_id: int) -> str: - return get_result_node_name(node_name, port_id) - def get_sub_input_output_names(subgraph: ov.Model) -> Tuple[str, str]: return subgraph.inputs[0].get_any_name(), subgraph.outputs[0].get_any_name() From 3a9a355bb648703ffafa03de89d0c625bc3aa7d6 Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Thu, 12 Dec 2024 14:47:47 +0100 Subject: [PATCH 3/8] Improve mapping --- nncf/openvino/graph/model_builder.py | 90 +++++++++++++++++++ nncf/openvino/graph/model_utils.py | 13 +++ nncf/openvino/graph/node_utils.py | 7 +- .../fast_bias_correction/algorithm.py | 8 +- .../fast_bias_correction/openvino_backend.py | 31 ++++++- .../fast_bias_correction/torch_backend.py | 12 +++ 6 files changed, 149 insertions(+), 12 deletions(-) create mode 100644 nncf/openvino/graph/model_builder.py diff --git a/nncf/openvino/graph/model_builder.py b/nncf/openvino/graph/model_builder.py new file mode 100644 index 00000000000..f47ac96050a --- /dev/null +++ b/nncf/openvino/graph/model_builder.py @@ -0,0 +1,90 @@ +# Copyright (c) 2024 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict + +import openvino.runtime as ov +from openvino.runtime import opset13 as opset + +import nncf +from nncf.common.graph import NNCFNode +from nncf.openvino.graph.metatypes import openvino_metatypes as om +from nncf.openvino.graph.model_utils import update_tensor_name +from nncf.openvino.graph.node_utils import get_parameter_node_name +from nncf.openvino.graph.node_utils import get_result_node_name + + +def build_for_fast_bc( + model: ov.Model, + node: NNCFNode, + act_port_id: int, + weight_port_id: int, + out_port_id: int = 0, + node_mapping=Dict[str, ov.Node], +) -> ov.Model: + """ + Builds submodel for the FastBiasCorrection algorithm. + The submodel consists of the biased layer (but without bias), weight quantized and weights: + Constant + | + Parameter FakeQuantize + \ / + Convolution + | + Result + + :param model: ov.Model instance as the reference. + :param node: NNCFNode with the layer-related information. + :param act_port_id: Activation port ID. + :param weight_port_id: Weight port ID. + :param out_port_id: Output port ID. + :return: ov.Model subgraph. + """ + # Create nodes mapping + node_name = node.node_name + original_node = node_mapping[node_name] + activation_port = original_node.input_value(act_port_id) + weight_port = original_node.input_value(weight_port_id) + original_weight_fq = weight_port.get_node() + weight_fq_in, weight_fq_in_low, weight_fq_in_high, weight_fq_out_low, weight_fq_out_high = [ + p.get_node() for p in original_weight_fq.input_values() + ] + # Build subgraph + parameter_name = get_parameter_node_name(node_name, act_port_id) + parameter = opset.parameter( + shape=activation_port.partial_shape, + dtype=activation_port.get_element_type(), + name=parameter_name, + ) + weight_fq_params = original_weight_fq.get_attributes() + weight_fq_params.update( + { + "data": weight_fq_in, + "input_low": weight_fq_in_low, + "input_high": weight_fq_in_high, + "output_low": weight_fq_out_low, + "output_high": weight_fq_out_high, + "name": original_weight_fq.get_friendly_name(), + } + ) + weights_fq = opset.fake_quantize(**weight_fq_params) + main_node_params = original_node.get_attributes() + if node.metatype == om.OVConvolutionMetatype: + main_node_params.update({"data": parameter, "filters": weights_fq, "name": original_node.get_friendly_name()}) + main_node = opset.convolution(**main_node_params) + elif node.metatype == om.OVMatMulMetatype: + main_node_params.update({"data_a": parameter, "data_b": weights_fq, "name": original_node.get_friendly_name()}) + main_node = opset.matmul(**main_node_params) + else: + raise nncf.ModuleNotFoundError(f"Not found node type: {node.metatype.name}!") + result_name = get_result_node_name(node_name, port_id=out_port_id) + result = opset.result(main_node, name=result_name) + update_tensor_name([result.get_output_tensor(0)], result_name) + return ov.Model([result], [parameter]) diff --git a/nncf/openvino/graph/model_utils.py b/nncf/openvino/graph/model_utils.py index b89c3cba890..7c0fb6f3695 100644 --- a/nncf/openvino/graph/model_utils.py +++ b/nncf/openvino/graph/model_utils.py @@ -12,6 +12,7 @@ from typing import List import openvino.runtime as ov +from openvino._pyopenvino import DescriptorTensor from nncf.common.factory import ModelTransformerFactory from nncf.common.graph.graph import NNCFGraph @@ -106,3 +107,15 @@ def copy_rt_info(model_source: ov.Model, model_dest: ov.Model, path: List[str]) if model_source.has_rt_info(path): source_rt_info = model_source.get_rt_info(path) model_dest.set_rt_info(source_rt_info, path) + + +def update_tensor_name(tensors: List[DescriptorTensor], name: str) -> None: + """ + Updates tensors names in-place. + :param model: List of the tensors. + :param name: New name for tensor. + """ + for tensor in tensors: + current_names = tensor.get_names() + current_names.add(name) + tensor.set_names(current_names) diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index 7496187adb1..e3dddd9eca5 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -121,7 +121,9 @@ def get_const_value(const_node: ov.Node) -> np.ndarray: return const_node.data -def get_bias_value(node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model) -> np.ndarray: +def get_bias_value( + node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model, node_mapping=Dict[str, ov.Node] +) -> np.ndarray: """ Returns the bias tensor for the biased node. @@ -130,9 +132,8 @@ def get_bias_value(node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Mo :param model: The model that contains this operation. :return: The bias value that is applied to the output tensor of the node's operation. """ - ops_dict = {op.get_friendly_name(): op for op in model.get_ops()} bias_constant = get_node_with_bias_value(get_add_bias_node(node_with_bias, nncf_graph), nncf_graph) - ov_bias_constant = ops_dict[bias_constant.node_name] + ov_bias_constant = node_mapping[bias_constant.node_name] return get_const_value(ov_bias_constant) diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 3d104cad3c9..5b791c790de 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -135,6 +135,7 @@ def apply( dataset: Optional[Dataset] = None, ) -> TModel: self._set_backend_entity(model) + self._backend_entity.node_mapping = model model_transformer = ModelTransformerFactory.create(model) @@ -162,12 +163,7 @@ def apply( output_fp = self._get_fp_outputs(statistic_points, out_node_name) - # In case of the matrix multiplication layers, this is crucial to know the correct input port. - input_id = (in_node_name, input_port_id) - # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends. - output_id = (out_node_name, 0) - - extracted_model = self._extract_submodel(model_transformer, input_id, output_id) + extracted_model = self._backend_entity.build_submodel(model, node, input_port_id, 0) if extracted_model is None: nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel") continue diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py index 1f92559eeb8..5c304d865a8 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py @@ -20,6 +20,7 @@ from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.openvino.graph.metatypes.groups import FAKE_QUANTIZE_OPERATIONS from nncf.openvino.graph.metatypes.groups import OPERATIONS_WITH_BIAS_REDUCED +from nncf.openvino.graph.model_builder import build_for_fast_bc from nncf.openvino.graph.node_utils import get_activation_channel_axis from nncf.openvino.graph.node_utils import get_bias_value from nncf.openvino.graph.node_utils import is_node_with_bias @@ -33,6 +34,19 @@ class OVFastBiasCorrectionAlgoBackend(FastBiasCorrectionAlgoBackend): + + def __init__(self): + super().__init__() + self._node_mapping = None + + @property + def node_mapping(self): + return self._node_mapping + + @node_mapping.setter + def node_mapping(self, model): + self._node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} + @staticmethod def target_point(target_type: TargetType, target_node_name: str, port_id: int) -> OVTargetPoint: return OVTargetPoint(target_type, target_node_name, port_id) @@ -73,9 +87,8 @@ def create_input_data( input_data = {input_name: blob} return input_data - @staticmethod - def get_bias_value(node: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model) -> Tensor: - return Tensor(get_bias_value(node, nncf_graph, model)) + def get_bias_value(self, node: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model) -> Tensor: + return Tensor(get_bias_value(node, nncf_graph, model, node_mapping=self.node_mapping)) @staticmethod def get_activation_port_ids_for_bias_node(node: NNCFNode) -> Tuple[int, int]: @@ -113,3 +126,15 @@ def get_node_names_for_input_output_statistics(node: NNCFNode, nncf_graph: NNCFG @staticmethod def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: Tuple[int]) -> int: return get_activation_channel_axis(node, port_id, input_shape) + + def build_submodel(self, model: ov.Model, node: NNCFNode, input_port_id: int, output_port_id: int) -> ov.Model: + const_port_ids = node.layer_attributes.get_const_port_ids() + assert len(const_port_ids) == 1 + return build_for_fast_bc( + model, + node, + act_port_id=input_port_id, + weight_port_id=const_port_ids[0], + out_port_id=output_port_id, + node_mapping=self.node_mapping, + ) diff --git a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py index 7eda61ce64a..9bf1e640a82 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py @@ -39,6 +39,18 @@ class PTFastBiasCorrectionAlgoBackend(FastBiasCorrectionAlgoBackend): TargetType.POST_LAYER_OPERATION: TargetType.OPERATOR_POST_HOOK, } + def __init__(self): + super().__init__() + self._node_mapping = None + + @property + def node_mapping(self): + return self._node_mapping + + @node_mapping.setter + def node_mapping(self, model): + self._node_mapping = {} + @staticmethod def target_point(target_type: TargetType, target_node_name: str, port_id: int) -> PTTargetPoint: if NNCFGraphNodeType.INPUT_NODE in target_node_name or target_type == TargetType.POST_LAYER_OPERATION: From c0420c8da5617f6d4b8b7cd23f4605b5847ef004 Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Thu, 12 Dec 2024 15:07:47 +0100 Subject: [PATCH 4/8] WA for other backends --- .../algorithms/fast_bias_correction/algorithm.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 5b791c790de..2c3dc75a4f6 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -163,7 +163,15 @@ def apply( output_fp = self._get_fp_outputs(statistic_points, out_node_name) - extracted_model = self._backend_entity.build_submodel(model, node, input_port_id, 0) + if hasattr(self._backend_entity, "build_submodel"): + extracted_model = self._backend_entity.build_submodel(model, node, input_port_id, 0) + else: + # In case of the matrix multiplication layers, this is crucial to know the correct input port. + input_id = (in_node_name, input_port_id) + # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends. + output_id = (out_node_name, 0) + + extracted_model = self._extract_submodel(model_transformer, input_id, output_id) if extracted_model is None: nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel") continue From 5d0f3bc9d7353931630d02394b0fa237d54cdddd Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Wed, 18 Dec 2024 16:49:44 +0100 Subject: [PATCH 5/8] Dynamic graph building --- nncf/openvino/graph/model_builder.py | 272 +++++++++++++----- nncf/openvino/graph/node_utils.py | 2 +- .../fast_bias_correction/algorithm.py | 33 +-- .../fast_bias_correction/backend.py | 19 ++ .../fast_bias_correction/openvino_backend.py | 33 +-- 5 files changed, 239 insertions(+), 120 deletions(-) diff --git a/nncf/openvino/graph/model_builder.py b/nncf/openvino/graph/model_builder.py index f47ac96050a..cf314650718 100644 --- a/nncf/openvino/graph/model_builder.py +++ b/nncf/openvino/graph/model_builder.py @@ -8,83 +8,215 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict +from collections import deque +from typing import Dict, List, Tuple import openvino.runtime as ov from openvino.runtime import opset13 as opset +from openvino.runtime.utils.node_factory import NodeFactory -import nncf -from nncf.common.graph import NNCFNode -from nncf.openvino.graph.metatypes import openvino_metatypes as om -from nncf.openvino.graph.model_utils import update_tensor_name +from nncf.openvino.graph.model_transformer import OVModelTransformer from nncf.openvino.graph.node_utils import get_parameter_node_name from nncf.openvino.graph.node_utils import get_result_node_name -def build_for_fast_bc( - model: ov.Model, - node: NNCFNode, - act_port_id: int, - weight_port_id: int, - out_port_id: int = 0, - node_mapping=Dict[str, ov.Node], -) -> ov.Model: +class ModelBuilder: """ - Builds submodel for the FastBiasCorrection algorithm. - The submodel consists of the biased layer (but without bias), weight quantized and weights: - Constant - | - Parameter FakeQuantize - \ / - Convolution - | - Result - - :param model: ov.Model instance as the reference. - :param node: NNCFNode with the layer-related information. - :param act_port_id: Activation port ID. - :param weight_port_id: Weight port ID. - :param out_port_id: Output port ID. - :return: ov.Model subgraph. + The purpose of the ModelBuilder is to build a new OpenVINO model from input and output points. + This Builder was created to reduce the number of model cloning that is required for ModelTransformer to work. """ - # Create nodes mapping - node_name = node.node_name - original_node = node_mapping[node_name] - activation_port = original_node.input_value(act_port_id) - weight_port = original_node.input_value(weight_port_id) - original_weight_fq = weight_port.get_node() - weight_fq_in, weight_fq_in_low, weight_fq_in_high, weight_fq_out_low, weight_fq_out_high = [ - p.get_node() for p in original_weight_fq.input_values() - ] - # Build subgraph - parameter_name = get_parameter_node_name(node_name, act_port_id) - parameter = opset.parameter( - shape=activation_port.partial_shape, - dtype=activation_port.get_element_type(), - name=parameter_name, - ) - weight_fq_params = original_weight_fq.get_attributes() - weight_fq_params.update( - { - "data": weight_fq_in, - "input_low": weight_fq_in_low, - "input_high": weight_fq_in_high, - "output_low": weight_fq_out_low, - "output_high": weight_fq_out_high, - "name": original_weight_fq.get_friendly_name(), - } - ) - weights_fq = opset.fake_quantize(**weight_fq_params) - main_node_params = original_node.get_attributes() - if node.metatype == om.OVConvolutionMetatype: - main_node_params.update({"data": parameter, "filters": weights_fq, "name": original_node.get_friendly_name()}) - main_node = opset.convolution(**main_node_params) - elif node.metatype == om.OVMatMulMetatype: - main_node_params.update({"data_a": parameter, "data_b": weights_fq, "name": original_node.get_friendly_name()}) - main_node = opset.matmul(**main_node_params) - else: - raise nncf.ModuleNotFoundError(f"Not found node type: {node.metatype.name}!") - result_name = get_result_node_name(node_name, port_id=out_port_id) - result = opset.result(main_node, name=result_name) - update_tensor_name([result.get_output_tensor(0)], result_name) - return ov.Model([result], [parameter]) + + def __init__(self): + self._node_factory = NodeFactory() + + @staticmethod + def _create_parameter(node_name: str, node_input: ov.Input) -> ov.Node: + """ + A method that contains steps to create a Parameter for a new model using a specific template. + """ + port_id = node_input.get_index() + parameter_name = get_parameter_node_name(node_name, port_id) + return opset.parameter( + shape=node_input.get_partial_shape(), + dtype=node_input.get_element_type(), + name=parameter_name, + ) + + @staticmethod + def _create_result(node_name: str, node_output: ov.Input) -> ov.Node: + """ + A method that contains steps to create a Result for a new model using a specific template. + """ + port_id = node_output.get_index() + result_name = get_result_node_name(node_name, port_id=port_id) + result = opset.result(node_output, name=result_name) + result.get_output_tensor(0).set_names({result_name}) + return result + + def _collect_graph_nodes( + self, + input_ids: List[Tuple[str, int]], + output_ids: List[Tuple[str, int]], + node_mapping: Dict[str, ov.Node], + ) -> List[ov.Node]: + """ + A method for aggregating layers to be further cloned. + Aggregation is designed in such a way that layers are listed from right to left, + as they pass from bottom to top. This is done in order to find all constants in the model and + to start graph creation from them (as well as Parameter layers), because + OpenVINO graph is created from top-down and cannot be created otherwise. + + Legend: w - weigths, c - convert, il/lh - input low/high, ol/oh - output low/high + (w) + | + (c) (il) (ih) (ol) (oh) + \ | | / / + (fake quantize) (parameter) + \ / + (convolution) + | + (result) + Based on the above graph, the return value would look like this: + [convolution, parameter, fake quantize, oh, ol, ih, il, c, w] + + :param input_ids: List of the ids specified in algorithm. + :param output_ids: List of the ids specified in algorithm. + :param node_mapping: Original nodes mapping. + :return: List of the ov.Nodes to clone. + """ + # Creating a list as a deque for FIFO layer acquisition and retrieval + lookup_nodes = deque(node_mapping[n] for n, _ in output_ids) + graph_nodes = [] + + while lookup_nodes: + lookup_node = lookup_nodes.popleft() + lookup_name = lookup_node.get_friendly_name() + node_inputs = lookup_node.inputs() + graph_nodes.append(lookup_node) + # Reversing to lookup nodes from right to left + for node_input in reversed(node_inputs): + port_id = node_input.get_index() + if (lookup_name, port_id) in input_ids: + # We create Parameters here to avoid double creation in the future since it is not an original node, + # but we need to have it as input for next node. + parameter = self._create_parameter(lookup_name, node_input) + lookup_nodes.append(parameter) + continue + parent_node = node_input.get_source_output().get_node() + lookup_nodes.append(parent_node) + + return graph_nodes + + def build( + self, + input_ids: List[Tuple[str, int]], + output_ids: List[Tuple[str, int]], + node_mapping: Dict[str, ov.Node], + ) -> ov.Model: + """ + The basic method of the algorithm. This method uses an aggregated list of layers to be recreated. + Let us take a graph of this kind as an example: + + Legend: w - weigths, c - convert, il/lh - input low/high, ol/oh - output low/high + (w) + | + (c) (il) (ih) (ol) (oh) + \ | | / / + (fake quantize) (parameter) + \ / + (convolution) + | + (result) + + The externally collected list of layers will look like this: + [convolution, parameter, fake quantize, oh, ol, ih, il, c, w] + + Next, this list will be circled from right to left. At the same time, the list of already created layers + will be filled from left to right, which will be used in the traversal step also, from left to right, + in order to keep the order of the original layer inputs. + For example: + + graph_nodes = [convolution, parameter, fake quantize, oh, ol, ih, il, c, w] + clone_nodes = [] + + *creating w - weight node.* + graph_nodes = [convolution, parameter, fake quantize, oh, ol, ih, il, c] + clone_nodes = [w] + + *creating c - convert node. + Based on the .inputs() output, we'll use the already created w-weight node to fill in the convert input. + As the result, weight node would be removed from the clone_nodes list and convert node would be placed here.* + graph_nodes = [convolution, parameter, fake quantize, oh, ol, ih, il] + clone_nodes = [c] + + *creating il/lh - input low/high, ol/oh - output low/high nodes. + Since these nodes are constants and do not require any nodes as inputs, cloned nodes will not be used.* + graph_nodes = [convolution, parameter, fake quantize, oh, ol, ih, il] + clone_nodes = [c, il, ih, ol, oh] + + *creating fake quantize node. + This node requires to have input values in a specific order. + All previous nodes will be connected/used for fake quantize, from left to right.* + graph_nodes = [convolution, parameter] + clone_nodes = [f] + + *creating parameter node. + In this step, the list of parameters will also be filled out with the new node.* + graph_nodes = [convolution] + clone_nodes = [f, parameter] + + *creating convolution node. + This node also requires to have inputs in a specific order. + All previous nodes will be connected/used for convolution, from left to right. Also, + the outputs verification step will show here that one of the convolution outputs is in the output_ids list. + This means that the Result node would be created and placed into the results list.* + graph_nodes = [] + clone_nodes = [convolution] + + The last step is to create a subgraph model based on the parameters & results lists. + + :param input_ids: List of the ids specified in algorithm. + :param output_ids: List of the ids specified in algorithm. + :param node_mapping: Original nodes mapping. + :return: Builded ov.Model based on parameters. + """ + + parameters, results = [], [] + clone_nodes = deque() + + # Collecting nodes that declares the graph. + graph_nodes = self._collect_graph_nodes(input_ids, output_ids, node_mapping) + + while graph_nodes: + graph_node = graph_nodes.pop() + node_type = graph_node.get_type_name() + node_name = graph_node.get_friendly_name() + + # To create the new OpenVINO nodes, we need to provide all possible layer attributes. + attrs = graph_node.get_attributes() + attrs["name"] = node_name + + if node_type == "Constant": + # Constants creation is apart due to specific behavior. + clone_node = OVModelTransformer._create_constant( + graph_node.get_data(), dtype=graph_node.get_element_type(), name=attrs["name"] + ) + elif node_type == "Parameter": + # We've created Parameter nodes on the previous step. + clone_node = graph_node + parameters.append(clone_node) + else: + # We have to have args as the inputs since all of them are nodes and are required to be as input. + args = [clone_nodes.popleft() for _ in graph_node.inputs()] + + clone_node = self._node_factory.create(node_type, args, attrs) + + for node_output in clone_node.outputs(): + port_id = node_output.get_index() + if (node_name, port_id) in output_ids: + result = self._create_result(node_name, node_output) + results.append(result) + + clone_nodes.append(clone_node) + + return ov.Model(results, parameters) diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index e3dddd9eca5..52746f06f2e 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -122,7 +122,7 @@ def get_const_value(const_node: ov.Node) -> np.ndarray: def get_bias_value( - node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model, node_mapping=Dict[str, ov.Node] + node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model, node_mapping: Dict[str, ov.Node] ) -> np.ndarray: """ Returns the bias tensor for the biased node. diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 2c3dc75a4f6..571c0384d0b 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -17,7 +17,6 @@ from nncf.common.factory import EngineFactory from nncf.common.factory import ModelTransformerFactory from nncf.common.graph.graph import NNCFGraph -from nncf.common.graph.model_transformer import ModelTransformer from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout @@ -111,7 +110,7 @@ def _set_backend_entity(self, model: TModel) -> None: OVFastBiasCorrectionAlgoBackend, ) - self._backend_entity = OVFastBiasCorrectionAlgoBackend() + self._backend_entity = OVFastBiasCorrectionAlgoBackend(model) elif model_backend == BackendType.TORCH: from nncf.quantization.algorithms.fast_bias_correction.torch_backend import PTFastBiasCorrectionAlgoBackend @@ -163,15 +162,12 @@ def apply( output_fp = self._get_fp_outputs(statistic_points, out_node_name) - if hasattr(self._backend_entity, "build_submodel"): - extracted_model = self._backend_entity.build_submodel(model, node, input_port_id, 0) - else: - # In case of the matrix multiplication layers, this is crucial to know the correct input port. - input_id = (in_node_name, input_port_id) - # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends. - output_id = (out_node_name, 0) + # In case of the matrix multiplication layers, this is crucial to know the correct input port. + input_id = (in_node_name, input_port_id) + # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends. + output_id = (out_node_name, 0) - extracted_model = self._extract_submodel(model_transformer, input_id, output_id) + extracted_model = self._backend_entity.extract_submodel(model_transformer, input_id, output_id) if extracted_model is None: nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel") continue @@ -291,23 +287,6 @@ def output_filter_func(point): output_fp.extend(tensor_collector.get_statistics().mean_values) return output_fp - def _extract_submodel( - self, model_transformer: ModelTransformer, input_id: Tuple[str, int], output_id: Tuple[str, int] - ) -> TModel: - """ - Extracts sub-model using backend-specific ModelTransformer. - - :param model_transformer: Backend-specific ModelTransformer. - :param input_id: Input ID. - :param output_id: Output ID. - :return: Backend-specific sub-model. - """ - model_extraction_command = self._backend_entity.model_extraction_command([input_id], [output_id]) - me_transformation_layout = TransformationLayout() - me_transformation_layout.register(model_extraction_command) - extracted_model = model_transformer.transform(me_transformation_layout) - return extracted_model - def _add_statistic_point(self, container: StatisticPointsContainer, point: TargetPoint, axis: int) -> None: """ Adds specific statistic point. diff --git a/nncf/quantization/algorithms/fast_bias_correction/backend.py b/nncf/quantization/algorithms/fast_bias_correction/backend.py index 110e05161cd..7c76b3857fe 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/backend.py @@ -15,9 +15,11 @@ from nncf.common.graph import NNCFGraph from nncf.common.graph import NNCFNode +from nncf.common.graph.model_transformer import ModelTransformer from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand +from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.tensor_statistics.collectors import TensorStatisticCollectorBase from nncf.tensor import Tensor @@ -194,3 +196,20 @@ def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: Tuple :param input_shape: Shape of the input. :return: Channel axis number. """ + + def extract_submodel( + self, model_transformer: ModelTransformer, input_id: Tuple[str, int], output_id: Tuple[str, int] + ) -> TModel: + """ + Extracts sub-model using backend-specific ModelTransformer. + + :param model_transformer: Backend-specific ModelTransformer. + :param input_id: Input ID. + :param output_id: Output ID. + :return: Backend-specific sub-model. + """ + model_extraction_command = self.model_extraction_command([input_id], [output_id]) + me_transformation_layout = TransformationLayout() + me_transformation_layout.register(model_extraction_command) + extracted_model = model_transformer.transform(me_transformation_layout) + return extracted_model diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py index 5c304d865a8..b79e165228e 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py @@ -20,7 +20,7 @@ from nncf.experimental.common.tensor_statistics.collectors import TensorCollector from nncf.openvino.graph.metatypes.groups import FAKE_QUANTIZE_OPERATIONS from nncf.openvino.graph.metatypes.groups import OPERATIONS_WITH_BIAS_REDUCED -from nncf.openvino.graph.model_builder import build_for_fast_bc +from nncf.openvino.graph.model_builder import ModelBuilder from nncf.openvino.graph.node_utils import get_activation_channel_axis from nncf.openvino.graph.node_utils import get_bias_value from nncf.openvino.graph.node_utils import is_node_with_bias @@ -35,17 +35,10 @@ class OVFastBiasCorrectionAlgoBackend(FastBiasCorrectionAlgoBackend): - def __init__(self): - super().__init__() - self._node_mapping = None - - @property - def node_mapping(self): - return self._node_mapping - - @node_mapping.setter - def node_mapping(self, model): + def __init__(self, model): + # Node mapping caching to reduce time for calculations self._node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} + self._model_builder = ModelBuilder() @staticmethod def target_point(target_type: TargetType, target_node_name: str, port_id: int) -> OVTargetPoint: @@ -88,7 +81,7 @@ def create_input_data( return input_data def get_bias_value(self, node: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model) -> Tensor: - return Tensor(get_bias_value(node, nncf_graph, model, node_mapping=self.node_mapping)) + return Tensor(get_bias_value(node, nncf_graph, model, node_mapping=self._node_mapping)) @staticmethod def get_activation_port_ids_for_bias_node(node: NNCFNode) -> Tuple[int, int]: @@ -127,14 +120,10 @@ def get_node_names_for_input_output_statistics(node: NNCFNode, nncf_graph: NNCFG def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: Tuple[int]) -> int: return get_activation_channel_axis(node, port_id, input_shape) - def build_submodel(self, model: ov.Model, node: NNCFNode, input_port_id: int, output_port_id: int) -> ov.Model: - const_port_ids = node.layer_attributes.get_const_port_ids() - assert len(const_port_ids) == 1 - return build_for_fast_bc( - model, - node, - act_port_id=input_port_id, - weight_port_id=const_port_ids[0], - out_port_id=output_port_id, - node_mapping=self.node_mapping, + def extract_submodel(self, model_transformer, input_id, output_id): + + return self._model_builder.build( + input_ids=[input_id], + output_ids=[output_id], + node_mapping=self._node_mapping, ) From 84db55fb7f147d3da340e1daa4c7c7114b9d07e2 Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Wed, 18 Dec 2024 16:51:54 +0100 Subject: [PATCH 6/8] Remove unused code --- .../algorithms/fast_bias_correction/torch_backend.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py index 9bf1e640a82..7eda61ce64a 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py +++ b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py @@ -39,18 +39,6 @@ class PTFastBiasCorrectionAlgoBackend(FastBiasCorrectionAlgoBackend): TargetType.POST_LAYER_OPERATION: TargetType.OPERATOR_POST_HOOK, } - def __init__(self): - super().__init__() - self._node_mapping = None - - @property - def node_mapping(self): - return self._node_mapping - - @node_mapping.setter - def node_mapping(self, model): - self._node_mapping = {} - @staticmethod def target_point(target_type: TargetType, target_node_name: str, port_id: int) -> PTTargetPoint: if NNCFGraphNodeType.INPUT_NODE in target_node_name or target_type == TargetType.POST_LAYER_OPERATION: From e7e7ea50bd86d4b85fbd89080cd973b33f0810a5 Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Wed, 18 Dec 2024 16:53:30 +0100 Subject: [PATCH 7/8] Remove unused code --- nncf/openvino/graph/model_utils.py | 13 ------------- .../algorithms/fast_bias_correction/algorithm.py | 1 - 2 files changed, 14 deletions(-) diff --git a/nncf/openvino/graph/model_utils.py b/nncf/openvino/graph/model_utils.py index 7c0fb6f3695..b89c3cba890 100644 --- a/nncf/openvino/graph/model_utils.py +++ b/nncf/openvino/graph/model_utils.py @@ -12,7 +12,6 @@ from typing import List import openvino.runtime as ov -from openvino._pyopenvino import DescriptorTensor from nncf.common.factory import ModelTransformerFactory from nncf.common.graph.graph import NNCFGraph @@ -107,15 +106,3 @@ def copy_rt_info(model_source: ov.Model, model_dest: ov.Model, path: List[str]) if model_source.has_rt_info(path): source_rt_info = model_source.get_rt_info(path) model_dest.set_rt_info(source_rt_info, path) - - -def update_tensor_name(tensors: List[DescriptorTensor], name: str) -> None: - """ - Updates tensors names in-place. - :param model: List of the tensors. - :param name: New name for tensor. - """ - for tensor in tensors: - current_names = tensor.get_names() - current_names.add(name) - tensor.set_names(current_names) diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 571c0384d0b..35f057f8a66 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -134,7 +134,6 @@ def apply( dataset: Optional[Dataset] = None, ) -> TModel: self._set_backend_entity(model) - self._backend_entity.node_mapping = model model_transformer = ModelTransformerFactory.create(model) From ac28262d078773d94d974362cfd15260d34dbee1 Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Wed, 18 Dec 2024 16:56:15 +0100 Subject: [PATCH 8/8] Add WA --- nncf/openvino/graph/node_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index 52746f06f2e..e73fdb14026 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -122,7 +122,7 @@ def get_const_value(const_node: ov.Node) -> np.ndarray: def get_bias_value( - node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model, node_mapping: Dict[str, ov.Node] + node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model, node_mapping: Dict[str, ov.Node] = None ) -> np.ndarray: """ Returns the bias tensor for the biased node. @@ -130,8 +130,11 @@ def get_bias_value( :param node_with_bias: The node that corresponds to the operation with bias. :param nncf_graph: NNCFGraph instance. :param model: The model that contains this operation. + :param node_mapping: Original nodes mapping cache. :return: The bias value that is applied to the output tensor of the node's operation. """ + if node_mapping is None: + node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} bias_constant = get_node_with_bias_value(get_add_bias_node(node_with_bias, nncf_graph), nncf_graph) ov_bias_constant = node_mapping[bias_constant.node_name] return get_const_value(ov_bias_constant)