From abf07e9a8fb109cdce9605334634cc073fd8b14f Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Wed, 11 Dec 2024 14:35:52 +0100
Subject: [PATCH 1/8] Change calculations

---
 nncf/openvino/graph/model_transformer.py      | 82 +++++++++----------
 .../fast_bias_correction/algorithm.py         | 78 +++++++++++++-----
 .../fast_bias_correction/openvino_backend.py  |  9 ++
 3 files changed, 108 insertions(+), 61 deletions(-)

diff --git a/nncf/openvino/graph/model_transformer.py b/nncf/openvino/graph/model_transformer.py
index a331b82314e..41d71705de9 100644
--- a/nncf/openvino/graph/model_transformer.py
+++ b/nncf/openvino/graph/model_transformer.py
@@ -558,50 +558,50 @@ def _apply_model_extraction_transformation(
         :return: Extracted sub-model.
         """
         outputs_type = ov.Type.f32
-        transformation = transformations[-1]
         name_to_node_mapping = OVModelTransformer._get_name_to_node_mapping(model)
 
         params, results = [], []
-        for input_name, input_port_id in transformation.input_ids:
-            input_node = name_to_node_mapping[input_name]
-            if input_name in [tensor.node.get_friendly_name() for tensor in model.inputs]:
-                params.append(input_node)
-                continue
-
-            input_port = input_node.input(input_port_id)
-            input_type = input_port.get_element_type()
-            input_node_output = input_port.get_source_output()
-            parameter_name = get_parameter_node_name(input_name, input_port_id)
-
-            new_param = opset.parameter(
-                shape=input_node_output.partial_shape,
-                dtype=outputs_type,
-                name=parameter_name,
-            )
-            new_input = new_param.output(0)
-
-            if input_type != outputs_type:
-                new_input = opset.convert(new_param, destination_type=input_type).output(0)
-
-            input_port.replace_source_output(new_input)
-            new_param_tensors = [o.get_tensor() for o in new_param.outputs()]
-            OVModelTransformer._update_tensor_names(new_param_tensors, [parameter_name])
-            params.append(new_param)
-
-        for output_name, output_port_id in transformation.output_ids:
-            output_node = name_to_node_mapping[output_name]
-
-            result_name = get_result_node_name(output_name, output_port_id)
-            output_port = output_node.output(output_port_id)
-            if output_port.get_element_type() != outputs_type:
-                output_port = opset.convert(output_node, destination_type=outputs_type).output(0)
-            new_result = opset.result(output_port, name=result_name)
-            result_tensor_names = [result_name] + list(output_port.get_names())
-            OVModelTransformer._update_tensor_names([new_result.get_output_tensor(0)], result_tensor_names)
-            results.append(new_result)
-
-        if not results:
-            results = model.get_results()
+        for transformation in transformations:
+            for input_name, input_port_id in transformation.input_ids:
+                input_node = name_to_node_mapping[input_name]
+                if input_name in [tensor.node.get_friendly_name() for tensor in model.inputs]:
+                    params.append(input_node)
+                    continue
+
+                input_port = input_node.input(input_port_id)
+                input_type = input_port.get_element_type()
+                input_node_output = input_port.get_source_output()
+                parameter_name = get_parameter_node_name(input_name, input_port_id)
+
+                new_param = opset.parameter(
+                    shape=input_node_output.partial_shape,
+                    dtype=outputs_type,
+                    name=parameter_name,
+                )
+                new_input = new_param.output(0)
+
+                if input_type != outputs_type:
+                    new_input = opset.convert(new_param, destination_type=input_type).output(0)
+
+                input_port.replace_source_output(new_input)
+                new_param_tensors = [o.get_tensor() for o in new_param.outputs()]
+                OVModelTransformer._update_tensor_names(new_param_tensors, [parameter_name])
+                params.append(new_param)
+
+            for output_name, output_port_id in transformation.output_ids:
+                output_node = name_to_node_mapping[output_name]
+
+                result_name = get_result_node_name(output_name, output_port_id)
+                output_port = output_node.output(output_port_id)
+                if output_port.get_element_type() != outputs_type:
+                    output_port = opset.convert(output_node, destination_type=outputs_type).output(0)
+                new_result = opset.result(output_port, name=result_name)
+                result_tensor_names = [result_name] + list(output_port.get_names())
+                OVModelTransformer._update_tensor_names([new_result.get_output_tensor(0)], result_tensor_names)
+                results.append(new_result)
+
+            if not results:
+                results.extend(model.get_results())
 
         extracted_model = ov.Model(results, params)
         copy_rt_info(model, extracted_model, path=["nncf"])
diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
index 3d104cad3c9..d1ea0204fe9 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
@@ -148,6 +148,38 @@ def apply(
         # for which we should update bias and new bias values.
         node_and_new_bias_value = []
 
+        input_feed = {}
+
+        me_transformation_layout = TransformationLayout()
+        for node, bias_value in track(node_and_bias_value, description="Applying Preparing step"):
+            node_name = node.node_name
+
+            if not self._backend_entity.is_quantized_weights(node, graph):
+                nncf_logger.debug(f"Skipping node {node_name} because weights were not quantized")
+                continue
+    
+            in_node_name, out_node_name = self._backend_entity.get_node_names_for_input_output_statistics(node, graph)
+            input_port_id, _ = self._backend_entity.get_activation_port_ids_for_bias_node(node)
+
+            input_id = (in_node_name, input_port_id)
+            output_id = (out_node_name, 0)
+
+            model_extraction_command = self._backend_entity.model_extraction_command([input_id], [output_id])
+            me_transformation_layout.register(model_extraction_command)
+
+            input_fp, input_shape = self._get_fp_inputs(statistic_points, in_node_name)
+            sub_input_name = self._backend_entity.get_parameter_node_name(node_name, input_port_id)
+
+            input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape)
+            input_blob = self._backend_entity.create_input_data(
+                input_shape, input_fp, sub_input_name, input_channel_axis
+            )
+            input_feed.update(input_blob)
+
+        extracted_model = model_transformer.transform(me_transformation_layout)
+        engine = EngineFactory.create(extracted_model)
+        raw_output = engine.infer(input_feed)
+
         for node, bias_value in track(node_and_bias_value, description="Applying Fast Bias correction"):
             node_name = node.node_name
 
@@ -167,29 +199,35 @@ def apply(
             # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends.
             output_id = (out_node_name, 0)
 
-            extracted_model = self._extract_submodel(model_transformer, input_id, output_id)
-            if extracted_model is None:
-                nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel")
-                continue
+            sub_output_name = self._backend_entity.get_result_node_name(node_name, 0)
+
+            # extracted_model = self._extract_submodel(model_transformer, input_id, output_id)
+            # if extracted_model is None:
+                # nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel")
+                # continue
 
-            sub_input_name, sub_output_name = self._backend_entity.get_sub_input_output_names(extracted_model)
+            # sub_input_name, sub_output_name = self._backend_entity.get_sub_input_output_names(extracted_model)
 
             output_channel_axis = node.metatype.output_channel_axis
-            input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape)
-            if bias_value.ndim > 1:
-                # Make index positive
-                output_channel_axis = range(bias_value.ndim)[output_channel_axis]
-                input_channel_axis = range(bias_value.ndim)[input_channel_axis]
-            input_blob = self._backend_entity.create_input_data(
-                input_shape, input_fp, sub_input_name, input_channel_axis
-            )
-            bias_shift = self._get_bias_shift(
-                model=extracted_model,
-                input_blob=input_blob,
-                output_channel_axis=output_channel_axis,
-                output_fp=output_fp,
-                output_name=sub_output_name,
-            )
+            # input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape)
+            # if bias_value.ndim > 1:
+            #     # Make index positive
+            #     output_channel_axis = range(bias_value.ndim)[output_channel_axis]
+            #     input_channel_axis = range(bias_value.ndim)[input_channel_axis]
+            # input_blob = self._backend_entity.create_input_data(
+            #     input_shape, input_fp, sub_input_name, input_channel_axis
+            # )
+            # bias_shift = self._get_bias_shift(
+            #     model=extracted_model,
+            #     input_blob=input_blob,
+            #     output_channel_axis=output_channel_axis,
+            #     output_fp=output_fp,
+            #     output_name=sub_output_name,
+            # )
+
+            q_outputs = self._backend_entity.process_model_output(raw_output, sub_output_name)
+            q_outputs = mean_per_channel(q_outputs, output_channel_axis)
+            bias_shift = fns.stack(output_fp) - q_outputs
 
             bias_shift = self._reshape_bias_shift(bias_shift, bias_value, output_channel_axis)
             updated_bias = bias_value + bias_shift
diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
index 1f92559eeb8..c20bde6e905 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
@@ -29,6 +29,8 @@
 from nncf.openvino.graph.transformations.commands import OVTargetPoint
 from nncf.openvino.statistics.collectors import get_mean_statistic_collector
 from nncf.quantization.algorithms.fast_bias_correction.backend import FastBiasCorrectionAlgoBackend
+from nncf.openvino.graph.node_utils import get_parameter_node_name
+from nncf.openvino.graph.node_utils import get_result_node_name
 from nncf.tensor import Tensor
 
 
@@ -59,6 +61,13 @@ def mean_statistic_collector(
         return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace)
 
     @staticmethod
+    def get_parameter_node_name(node_name: str, port_id: int) -> str:
+        return get_parameter_node_name(node_name, port_id)
+
+    @staticmethod
+    def get_result_node_name(node_name: str, port_id: int) -> str:
+        return get_result_node_name(node_name, port_id)
+
     def get_sub_input_output_names(subgraph: ov.Model) -> Tuple[str, str]:
         return subgraph.inputs[0].get_any_name(), subgraph.outputs[0].get_any_name()
 

From 1dbe08c34425c11d248bb6183c1634015bd34d43 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Thu, 12 Dec 2024 09:37:26 +0100
Subject: [PATCH 2/8] Revert "Change calculations"

This reverts commit abf07e9a8fb109cdce9605334634cc073fd8b14f.
---
 nncf/openvino/graph/model_transformer.py      | 82 +++++++++----------
 .../fast_bias_correction/algorithm.py         | 78 +++++-------------
 .../fast_bias_correction/openvino_backend.py  |  9 --
 3 files changed, 61 insertions(+), 108 deletions(-)

diff --git a/nncf/openvino/graph/model_transformer.py b/nncf/openvino/graph/model_transformer.py
index 41d71705de9..a331b82314e 100644
--- a/nncf/openvino/graph/model_transformer.py
+++ b/nncf/openvino/graph/model_transformer.py
@@ -558,50 +558,50 @@ def _apply_model_extraction_transformation(
         :return: Extracted sub-model.
         """
         outputs_type = ov.Type.f32
+        transformation = transformations[-1]
         name_to_node_mapping = OVModelTransformer._get_name_to_node_mapping(model)
 
         params, results = [], []
-        for transformation in transformations:
-            for input_name, input_port_id in transformation.input_ids:
-                input_node = name_to_node_mapping[input_name]
-                if input_name in [tensor.node.get_friendly_name() for tensor in model.inputs]:
-                    params.append(input_node)
-                    continue
-
-                input_port = input_node.input(input_port_id)
-                input_type = input_port.get_element_type()
-                input_node_output = input_port.get_source_output()
-                parameter_name = get_parameter_node_name(input_name, input_port_id)
-
-                new_param = opset.parameter(
-                    shape=input_node_output.partial_shape,
-                    dtype=outputs_type,
-                    name=parameter_name,
-                )
-                new_input = new_param.output(0)
-
-                if input_type != outputs_type:
-                    new_input = opset.convert(new_param, destination_type=input_type).output(0)
-
-                input_port.replace_source_output(new_input)
-                new_param_tensors = [o.get_tensor() for o in new_param.outputs()]
-                OVModelTransformer._update_tensor_names(new_param_tensors, [parameter_name])
-                params.append(new_param)
-
-            for output_name, output_port_id in transformation.output_ids:
-                output_node = name_to_node_mapping[output_name]
-
-                result_name = get_result_node_name(output_name, output_port_id)
-                output_port = output_node.output(output_port_id)
-                if output_port.get_element_type() != outputs_type:
-                    output_port = opset.convert(output_node, destination_type=outputs_type).output(0)
-                new_result = opset.result(output_port, name=result_name)
-                result_tensor_names = [result_name] + list(output_port.get_names())
-                OVModelTransformer._update_tensor_names([new_result.get_output_tensor(0)], result_tensor_names)
-                results.append(new_result)
-
-            if not results:
-                results.extend(model.get_results())
+        for input_name, input_port_id in transformation.input_ids:
+            input_node = name_to_node_mapping[input_name]
+            if input_name in [tensor.node.get_friendly_name() for tensor in model.inputs]:
+                params.append(input_node)
+                continue
+
+            input_port = input_node.input(input_port_id)
+            input_type = input_port.get_element_type()
+            input_node_output = input_port.get_source_output()
+            parameter_name = get_parameter_node_name(input_name, input_port_id)
+
+            new_param = opset.parameter(
+                shape=input_node_output.partial_shape,
+                dtype=outputs_type,
+                name=parameter_name,
+            )
+            new_input = new_param.output(0)
+
+            if input_type != outputs_type:
+                new_input = opset.convert(new_param, destination_type=input_type).output(0)
+
+            input_port.replace_source_output(new_input)
+            new_param_tensors = [o.get_tensor() for o in new_param.outputs()]
+            OVModelTransformer._update_tensor_names(new_param_tensors, [parameter_name])
+            params.append(new_param)
+
+        for output_name, output_port_id in transformation.output_ids:
+            output_node = name_to_node_mapping[output_name]
+
+            result_name = get_result_node_name(output_name, output_port_id)
+            output_port = output_node.output(output_port_id)
+            if output_port.get_element_type() != outputs_type:
+                output_port = opset.convert(output_node, destination_type=outputs_type).output(0)
+            new_result = opset.result(output_port, name=result_name)
+            result_tensor_names = [result_name] + list(output_port.get_names())
+            OVModelTransformer._update_tensor_names([new_result.get_output_tensor(0)], result_tensor_names)
+            results.append(new_result)
+
+        if not results:
+            results = model.get_results()
 
         extracted_model = ov.Model(results, params)
         copy_rt_info(model, extracted_model, path=["nncf"])
diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
index d1ea0204fe9..3d104cad3c9 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
@@ -148,38 +148,6 @@ def apply(
         # for which we should update bias and new bias values.
         node_and_new_bias_value = []
 
-        input_feed = {}
-
-        me_transformation_layout = TransformationLayout()
-        for node, bias_value in track(node_and_bias_value, description="Applying Preparing step"):
-            node_name = node.node_name
-
-            if not self._backend_entity.is_quantized_weights(node, graph):
-                nncf_logger.debug(f"Skipping node {node_name} because weights were not quantized")
-                continue
-    
-            in_node_name, out_node_name = self._backend_entity.get_node_names_for_input_output_statistics(node, graph)
-            input_port_id, _ = self._backend_entity.get_activation_port_ids_for_bias_node(node)
-
-            input_id = (in_node_name, input_port_id)
-            output_id = (out_node_name, 0)
-
-            model_extraction_command = self._backend_entity.model_extraction_command([input_id], [output_id])
-            me_transformation_layout.register(model_extraction_command)
-
-            input_fp, input_shape = self._get_fp_inputs(statistic_points, in_node_name)
-            sub_input_name = self._backend_entity.get_parameter_node_name(node_name, input_port_id)
-
-            input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape)
-            input_blob = self._backend_entity.create_input_data(
-                input_shape, input_fp, sub_input_name, input_channel_axis
-            )
-            input_feed.update(input_blob)
-
-        extracted_model = model_transformer.transform(me_transformation_layout)
-        engine = EngineFactory.create(extracted_model)
-        raw_output = engine.infer(input_feed)
-
         for node, bias_value in track(node_and_bias_value, description="Applying Fast Bias correction"):
             node_name = node.node_name
 
@@ -199,35 +167,29 @@ def apply(
             # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends.
             output_id = (out_node_name, 0)
 
-            sub_output_name = self._backend_entity.get_result_node_name(node_name, 0)
-
-            # extracted_model = self._extract_submodel(model_transformer, input_id, output_id)
-            # if extracted_model is None:
-                # nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel")
-                # continue
+            extracted_model = self._extract_submodel(model_transformer, input_id, output_id)
+            if extracted_model is None:
+                nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel")
+                continue
 
-            # sub_input_name, sub_output_name = self._backend_entity.get_sub_input_output_names(extracted_model)
+            sub_input_name, sub_output_name = self._backend_entity.get_sub_input_output_names(extracted_model)
 
             output_channel_axis = node.metatype.output_channel_axis
-            # input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape)
-            # if bias_value.ndim > 1:
-            #     # Make index positive
-            #     output_channel_axis = range(bias_value.ndim)[output_channel_axis]
-            #     input_channel_axis = range(bias_value.ndim)[input_channel_axis]
-            # input_blob = self._backend_entity.create_input_data(
-            #     input_shape, input_fp, sub_input_name, input_channel_axis
-            # )
-            # bias_shift = self._get_bias_shift(
-            #     model=extracted_model,
-            #     input_blob=input_blob,
-            #     output_channel_axis=output_channel_axis,
-            #     output_fp=output_fp,
-            #     output_name=sub_output_name,
-            # )
-
-            q_outputs = self._backend_entity.process_model_output(raw_output, sub_output_name)
-            q_outputs = mean_per_channel(q_outputs, output_channel_axis)
-            bias_shift = fns.stack(output_fp) - q_outputs
+            input_channel_axis = self._backend_entity.get_activation_channel_axis(node, input_port_id, input_shape)
+            if bias_value.ndim > 1:
+                # Make index positive
+                output_channel_axis = range(bias_value.ndim)[output_channel_axis]
+                input_channel_axis = range(bias_value.ndim)[input_channel_axis]
+            input_blob = self._backend_entity.create_input_data(
+                input_shape, input_fp, sub_input_name, input_channel_axis
+            )
+            bias_shift = self._get_bias_shift(
+                model=extracted_model,
+                input_blob=input_blob,
+                output_channel_axis=output_channel_axis,
+                output_fp=output_fp,
+                output_name=sub_output_name,
+            )
 
             bias_shift = self._reshape_bias_shift(bias_shift, bias_value, output_channel_axis)
             updated_bias = bias_value + bias_shift
diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
index c20bde6e905..1f92559eeb8 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
@@ -29,8 +29,6 @@
 from nncf.openvino.graph.transformations.commands import OVTargetPoint
 from nncf.openvino.statistics.collectors import get_mean_statistic_collector
 from nncf.quantization.algorithms.fast_bias_correction.backend import FastBiasCorrectionAlgoBackend
-from nncf.openvino.graph.node_utils import get_parameter_node_name
-from nncf.openvino.graph.node_utils import get_result_node_name
 from nncf.tensor import Tensor
 
 
@@ -61,13 +59,6 @@ def mean_statistic_collector(
         return get_mean_statistic_collector(num_samples, channel_axis, window_size, inplace)
 
     @staticmethod
-    def get_parameter_node_name(node_name: str, port_id: int) -> str:
-        return get_parameter_node_name(node_name, port_id)
-
-    @staticmethod
-    def get_result_node_name(node_name: str, port_id: int) -> str:
-        return get_result_node_name(node_name, port_id)
-
     def get_sub_input_output_names(subgraph: ov.Model) -> Tuple[str, str]:
         return subgraph.inputs[0].get_any_name(), subgraph.outputs[0].get_any_name()
 

From 3a9a355bb648703ffafa03de89d0c625bc3aa7d6 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Thu, 12 Dec 2024 14:47:47 +0100
Subject: [PATCH 3/8] Improve mapping

---
 nncf/openvino/graph/model_builder.py          | 90 +++++++++++++++++++
 nncf/openvino/graph/model_utils.py            | 13 +++
 nncf/openvino/graph/node_utils.py             |  7 +-
 .../fast_bias_correction/algorithm.py         |  8 +-
 .../fast_bias_correction/openvino_backend.py  | 31 ++++++-
 .../fast_bias_correction/torch_backend.py     | 12 +++
 6 files changed, 149 insertions(+), 12 deletions(-)
 create mode 100644 nncf/openvino/graph/model_builder.py

diff --git a/nncf/openvino/graph/model_builder.py b/nncf/openvino/graph/model_builder.py
new file mode 100644
index 00000000000..f47ac96050a
--- /dev/null
+++ b/nncf/openvino/graph/model_builder.py
@@ -0,0 +1,90 @@
+# Copyright (c) 2024 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict
+
+import openvino.runtime as ov
+from openvino.runtime import opset13 as opset
+
+import nncf
+from nncf.common.graph import NNCFNode
+from nncf.openvino.graph.metatypes import openvino_metatypes as om
+from nncf.openvino.graph.model_utils import update_tensor_name
+from nncf.openvino.graph.node_utils import get_parameter_node_name
+from nncf.openvino.graph.node_utils import get_result_node_name
+
+
+def build_for_fast_bc(
+    model: ov.Model,
+    node: NNCFNode,
+    act_port_id: int,
+    weight_port_id: int,
+    out_port_id: int = 0,
+    node_mapping=Dict[str, ov.Node],
+) -> ov.Model:
+    """
+    Builds submodel for the FastBiasCorrection algorithm.
+    The submodel consists of the biased layer (but without bias), weight quantized and weights:
+                 Constant
+                    |
+    Parameter  FakeQuantize
+        \          /
+        Convolution
+            |
+          Result
+
+    :param model: ov.Model instance as the reference.
+    :param node: NNCFNode with the layer-related information.
+    :param act_port_id: Activation port ID.
+    :param weight_port_id: Weight port ID.
+    :param out_port_id: Output port ID.
+    :return: ov.Model subgraph.
+    """
+    # Create nodes mapping
+    node_name = node.node_name
+    original_node = node_mapping[node_name]
+    activation_port = original_node.input_value(act_port_id)
+    weight_port = original_node.input_value(weight_port_id)
+    original_weight_fq = weight_port.get_node()
+    weight_fq_in, weight_fq_in_low, weight_fq_in_high, weight_fq_out_low, weight_fq_out_high = [
+        p.get_node() for p in original_weight_fq.input_values()
+    ]
+    # Build subgraph
+    parameter_name = get_parameter_node_name(node_name, act_port_id)
+    parameter = opset.parameter(
+        shape=activation_port.partial_shape,
+        dtype=activation_port.get_element_type(),
+        name=parameter_name,
+    )
+    weight_fq_params = original_weight_fq.get_attributes()
+    weight_fq_params.update(
+        {
+            "data": weight_fq_in,
+            "input_low": weight_fq_in_low,
+            "input_high": weight_fq_in_high,
+            "output_low": weight_fq_out_low,
+            "output_high": weight_fq_out_high,
+            "name": original_weight_fq.get_friendly_name(),
+        }
+    )
+    weights_fq = opset.fake_quantize(**weight_fq_params)
+    main_node_params = original_node.get_attributes()
+    if node.metatype == om.OVConvolutionMetatype:
+        main_node_params.update({"data": parameter, "filters": weights_fq, "name": original_node.get_friendly_name()})
+        main_node = opset.convolution(**main_node_params)
+    elif node.metatype == om.OVMatMulMetatype:
+        main_node_params.update({"data_a": parameter, "data_b": weights_fq, "name": original_node.get_friendly_name()})
+        main_node = opset.matmul(**main_node_params)
+    else:
+        raise nncf.ModuleNotFoundError(f"Not found node type: {node.metatype.name}!")
+    result_name = get_result_node_name(node_name, port_id=out_port_id)
+    result = opset.result(main_node, name=result_name)
+    update_tensor_name([result.get_output_tensor(0)], result_name)
+    return ov.Model([result], [parameter])
diff --git a/nncf/openvino/graph/model_utils.py b/nncf/openvino/graph/model_utils.py
index b89c3cba890..7c0fb6f3695 100644
--- a/nncf/openvino/graph/model_utils.py
+++ b/nncf/openvino/graph/model_utils.py
@@ -12,6 +12,7 @@
 from typing import List
 
 import openvino.runtime as ov
+from openvino._pyopenvino import DescriptorTensor
 
 from nncf.common.factory import ModelTransformerFactory
 from nncf.common.graph.graph import NNCFGraph
@@ -106,3 +107,15 @@ def copy_rt_info(model_source: ov.Model, model_dest: ov.Model, path: List[str])
     if model_source.has_rt_info(path):
         source_rt_info = model_source.get_rt_info(path)
         model_dest.set_rt_info(source_rt_info, path)
+
+
+def update_tensor_name(tensors: List[DescriptorTensor], name: str) -> None:
+    """
+    Updates tensors names in-place.
+    :param model: List of the tensors.
+    :param name: New name for tensor.
+    """
+    for tensor in tensors:
+        current_names = tensor.get_names()
+        current_names.add(name)
+        tensor.set_names(current_names)
diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py
index 7496187adb1..e3dddd9eca5 100644
--- a/nncf/openvino/graph/node_utils.py
+++ b/nncf/openvino/graph/node_utils.py
@@ -121,7 +121,9 @@ def get_const_value(const_node: ov.Node) -> np.ndarray:
     return const_node.data
 
 
-def get_bias_value(node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model) -> np.ndarray:
+def get_bias_value(
+    node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model, node_mapping=Dict[str, ov.Node]
+) -> np.ndarray:
     """
     Returns the bias tensor for the biased node.
 
@@ -130,9 +132,8 @@ def get_bias_value(node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Mo
     :param model: The model that contains this operation.
     :return: The bias value that is applied to the output tensor of the node's operation.
     """
-    ops_dict = {op.get_friendly_name(): op for op in model.get_ops()}
     bias_constant = get_node_with_bias_value(get_add_bias_node(node_with_bias, nncf_graph), nncf_graph)
-    ov_bias_constant = ops_dict[bias_constant.node_name]
+    ov_bias_constant = node_mapping[bias_constant.node_name]
     return get_const_value(ov_bias_constant)
 
 
diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
index 3d104cad3c9..5b791c790de 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
@@ -135,6 +135,7 @@ def apply(
         dataset: Optional[Dataset] = None,
     ) -> TModel:
         self._set_backend_entity(model)
+        self._backend_entity.node_mapping = model
 
         model_transformer = ModelTransformerFactory.create(model)
 
@@ -162,12 +163,7 @@ def apply(
 
             output_fp = self._get_fp_outputs(statistic_points, out_node_name)
 
-            # In case of the matrix multiplication layers, this is crucial to know the correct input port.
-            input_id = (in_node_name, input_port_id)
-            # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends.
-            output_id = (out_node_name, 0)
-
-            extracted_model = self._extract_submodel(model_transformer, input_id, output_id)
+            extracted_model = self._backend_entity.build_submodel(model, node, input_port_id, 0)
             if extracted_model is None:
                 nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel")
                 continue
diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
index 1f92559eeb8..5c304d865a8 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
@@ -20,6 +20,7 @@
 from nncf.experimental.common.tensor_statistics.collectors import TensorCollector
 from nncf.openvino.graph.metatypes.groups import FAKE_QUANTIZE_OPERATIONS
 from nncf.openvino.graph.metatypes.groups import OPERATIONS_WITH_BIAS_REDUCED
+from nncf.openvino.graph.model_builder import build_for_fast_bc
 from nncf.openvino.graph.node_utils import get_activation_channel_axis
 from nncf.openvino.graph.node_utils import get_bias_value
 from nncf.openvino.graph.node_utils import is_node_with_bias
@@ -33,6 +34,19 @@
 
 
 class OVFastBiasCorrectionAlgoBackend(FastBiasCorrectionAlgoBackend):
+
+    def __init__(self):
+        super().__init__()
+        self._node_mapping = None
+
+    @property
+    def node_mapping(self):
+        return self._node_mapping
+
+    @node_mapping.setter
+    def node_mapping(self, model):
+        self._node_mapping = {op.get_friendly_name(): op for op in model.get_ops()}
+
     @staticmethod
     def target_point(target_type: TargetType, target_node_name: str, port_id: int) -> OVTargetPoint:
         return OVTargetPoint(target_type, target_node_name, port_id)
@@ -73,9 +87,8 @@ def create_input_data(
         input_data = {input_name: blob}
         return input_data
 
-    @staticmethod
-    def get_bias_value(node: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model) -> Tensor:
-        return Tensor(get_bias_value(node, nncf_graph, model))
+    def get_bias_value(self, node: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model) -> Tensor:
+        return Tensor(get_bias_value(node, nncf_graph, model, node_mapping=self.node_mapping))
 
     @staticmethod
     def get_activation_port_ids_for_bias_node(node: NNCFNode) -> Tuple[int, int]:
@@ -113,3 +126,15 @@ def get_node_names_for_input_output_statistics(node: NNCFNode, nncf_graph: NNCFG
     @staticmethod
     def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: Tuple[int]) -> int:
         return get_activation_channel_axis(node, port_id, input_shape)
+
+    def build_submodel(self, model: ov.Model, node: NNCFNode, input_port_id: int, output_port_id: int) -> ov.Model:
+        const_port_ids = node.layer_attributes.get_const_port_ids()
+        assert len(const_port_ids) == 1
+        return build_for_fast_bc(
+            model,
+            node,
+            act_port_id=input_port_id,
+            weight_port_id=const_port_ids[0],
+            out_port_id=output_port_id,
+            node_mapping=self.node_mapping,
+        )
diff --git a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py
index 7eda61ce64a..9bf1e640a82 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py
@@ -39,6 +39,18 @@ class PTFastBiasCorrectionAlgoBackend(FastBiasCorrectionAlgoBackend):
         TargetType.POST_LAYER_OPERATION: TargetType.OPERATOR_POST_HOOK,
     }
 
+    def __init__(self):
+        super().__init__()
+        self._node_mapping = None
+
+    @property
+    def node_mapping(self):
+        return self._node_mapping
+
+    @node_mapping.setter
+    def node_mapping(self, model):
+        self._node_mapping = {}
+
     @staticmethod
     def target_point(target_type: TargetType, target_node_name: str, port_id: int) -> PTTargetPoint:
         if NNCFGraphNodeType.INPUT_NODE in target_node_name or target_type == TargetType.POST_LAYER_OPERATION:

From c0420c8da5617f6d4b8b7cd23f4605b5847ef004 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Thu, 12 Dec 2024 15:07:47 +0100
Subject: [PATCH 4/8] WA for other backends

---
 .../algorithms/fast_bias_correction/algorithm.py       | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
index 5b791c790de..2c3dc75a4f6 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
@@ -163,7 +163,15 @@ def apply(
 
             output_fp = self._get_fp_outputs(statistic_points, out_node_name)
 
-            extracted_model = self._backend_entity.build_submodel(model, node, input_port_id, 0)
+            if hasattr(self._backend_entity, "build_submodel"):
+                extracted_model = self._backend_entity.build_submodel(model, node, input_port_id, 0)
+            else:
+                # In case of the matrix multiplication layers, this is crucial to know the correct input port.
+                input_id = (in_node_name, input_port_id)
+                # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends.
+                output_id = (out_node_name, 0)
+
+                extracted_model = self._extract_submodel(model_transformer, input_id, output_id)
             if extracted_model is None:
                 nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel")
                 continue

From 5d0f3bc9d7353931630d02394b0fa237d54cdddd Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Wed, 18 Dec 2024 16:49:44 +0100
Subject: [PATCH 5/8] Dynamic graph building

---
 nncf/openvino/graph/model_builder.py          | 272 +++++++++++++-----
 nncf/openvino/graph/node_utils.py             |   2 +-
 .../fast_bias_correction/algorithm.py         |  33 +--
 .../fast_bias_correction/backend.py           |  19 ++
 .../fast_bias_correction/openvino_backend.py  |  33 +--
 5 files changed, 239 insertions(+), 120 deletions(-)

diff --git a/nncf/openvino/graph/model_builder.py b/nncf/openvino/graph/model_builder.py
index f47ac96050a..cf314650718 100644
--- a/nncf/openvino/graph/model_builder.py
+++ b/nncf/openvino/graph/model_builder.py
@@ -8,83 +8,215 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Dict
+from collections import deque
+from typing import Dict, List, Tuple
 
 import openvino.runtime as ov
 from openvino.runtime import opset13 as opset
+from openvino.runtime.utils.node_factory import NodeFactory
 
-import nncf
-from nncf.common.graph import NNCFNode
-from nncf.openvino.graph.metatypes import openvino_metatypes as om
-from nncf.openvino.graph.model_utils import update_tensor_name
+from nncf.openvino.graph.model_transformer import OVModelTransformer
 from nncf.openvino.graph.node_utils import get_parameter_node_name
 from nncf.openvino.graph.node_utils import get_result_node_name
 
 
-def build_for_fast_bc(
-    model: ov.Model,
-    node: NNCFNode,
-    act_port_id: int,
-    weight_port_id: int,
-    out_port_id: int = 0,
-    node_mapping=Dict[str, ov.Node],
-) -> ov.Model:
+class ModelBuilder:
     """
-    Builds submodel for the FastBiasCorrection algorithm.
-    The submodel consists of the biased layer (but without bias), weight quantized and weights:
-                 Constant
-                    |
-    Parameter  FakeQuantize
-        \          /
-        Convolution
-            |
-          Result
-
-    :param model: ov.Model instance as the reference.
-    :param node: NNCFNode with the layer-related information.
-    :param act_port_id: Activation port ID.
-    :param weight_port_id: Weight port ID.
-    :param out_port_id: Output port ID.
-    :return: ov.Model subgraph.
+    The purpose of the ModelBuilder is to build a new OpenVINO model from input and output points.
+    This Builder was created to reduce the number of model cloning that is required for ModelTransformer to work.
     """
-    # Create nodes mapping
-    node_name = node.node_name
-    original_node = node_mapping[node_name]
-    activation_port = original_node.input_value(act_port_id)
-    weight_port = original_node.input_value(weight_port_id)
-    original_weight_fq = weight_port.get_node()
-    weight_fq_in, weight_fq_in_low, weight_fq_in_high, weight_fq_out_low, weight_fq_out_high = [
-        p.get_node() for p in original_weight_fq.input_values()
-    ]
-    # Build subgraph
-    parameter_name = get_parameter_node_name(node_name, act_port_id)
-    parameter = opset.parameter(
-        shape=activation_port.partial_shape,
-        dtype=activation_port.get_element_type(),
-        name=parameter_name,
-    )
-    weight_fq_params = original_weight_fq.get_attributes()
-    weight_fq_params.update(
-        {
-            "data": weight_fq_in,
-            "input_low": weight_fq_in_low,
-            "input_high": weight_fq_in_high,
-            "output_low": weight_fq_out_low,
-            "output_high": weight_fq_out_high,
-            "name": original_weight_fq.get_friendly_name(),
-        }
-    )
-    weights_fq = opset.fake_quantize(**weight_fq_params)
-    main_node_params = original_node.get_attributes()
-    if node.metatype == om.OVConvolutionMetatype:
-        main_node_params.update({"data": parameter, "filters": weights_fq, "name": original_node.get_friendly_name()})
-        main_node = opset.convolution(**main_node_params)
-    elif node.metatype == om.OVMatMulMetatype:
-        main_node_params.update({"data_a": parameter, "data_b": weights_fq, "name": original_node.get_friendly_name()})
-        main_node = opset.matmul(**main_node_params)
-    else:
-        raise nncf.ModuleNotFoundError(f"Not found node type: {node.metatype.name}!")
-    result_name = get_result_node_name(node_name, port_id=out_port_id)
-    result = opset.result(main_node, name=result_name)
-    update_tensor_name([result.get_output_tensor(0)], result_name)
-    return ov.Model([result], [parameter])
+
+    def __init__(self):
+        self._node_factory = NodeFactory()
+
+    @staticmethod
+    def _create_parameter(node_name: str, node_input: ov.Input) -> ov.Node:
+        """
+        A method that contains steps to create a Parameter for a new model using a specific template.
+        """
+        port_id = node_input.get_index()
+        parameter_name = get_parameter_node_name(node_name, port_id)
+        return opset.parameter(
+            shape=node_input.get_partial_shape(),
+            dtype=node_input.get_element_type(),
+            name=parameter_name,
+        )
+
+    @staticmethod
+    def _create_result(node_name: str, node_output: ov.Input) -> ov.Node:
+        """
+        A method that contains steps to create a Result for a new model using a specific template.
+        """
+        port_id = node_output.get_index()
+        result_name = get_result_node_name(node_name, port_id=port_id)
+        result = opset.result(node_output, name=result_name)
+        result.get_output_tensor(0).set_names({result_name})
+        return result
+
+    def _collect_graph_nodes(
+        self,
+        input_ids: List[Tuple[str, int]],
+        output_ids: List[Tuple[str, int]],
+        node_mapping: Dict[str, ov.Node],
+    ) -> List[ov.Node]:
+        """
+        A method for aggregating layers to be further cloned.
+        Aggregation is designed in such a way that layers are listed from right to left,
+        as they pass from bottom to top. This is done in order to find all constants in the model and
+        to start graph creation from them (as well as Parameter layers), because
+        OpenVINO graph is created from top-down and cannot be created otherwise.
+
+        Legend: w - weigths, c - convert, il/lh - input low/high, ol/oh - output low/high
+        (w)
+         |
+        (c) (il) (ih) (ol) (oh)
+          \   |   |   /   /
+           (fake quantize) (parameter)
+                  \           /
+                  (convolution)
+                        |
+                    (result)
+        Based on the above graph, the return value would look like this:
+        [convolution, parameter, fake quantize, oh, ol, ih, il, c, w]
+
+        :param input_ids: List of the ids specified in algorithm.
+        :param output_ids: List of the ids specified in algorithm.
+        :param node_mapping: Original nodes mapping.
+        :return: List of the ov.Nodes to clone.
+        """
+        # Creating a list as a deque for FIFO layer acquisition and retrieval
+        lookup_nodes = deque(node_mapping[n] for n, _ in output_ids)
+        graph_nodes = []
+
+        while lookup_nodes:
+            lookup_node = lookup_nodes.popleft()
+            lookup_name = lookup_node.get_friendly_name()
+            node_inputs = lookup_node.inputs()
+            graph_nodes.append(lookup_node)
+            # Reversing to lookup nodes from right to left
+            for node_input in reversed(node_inputs):
+                port_id = node_input.get_index()
+                if (lookup_name, port_id) in input_ids:
+                    # We create Parameters here to avoid double creation in the future since it is not an original node,
+                    # but we need to have it as input for next node.
+                    parameter = self._create_parameter(lookup_name, node_input)
+                    lookup_nodes.append(parameter)
+                    continue
+                parent_node = node_input.get_source_output().get_node()
+                lookup_nodes.append(parent_node)
+
+        return graph_nodes
+
+    def build(
+        self,
+        input_ids: List[Tuple[str, int]],
+        output_ids: List[Tuple[str, int]],
+        node_mapping: Dict[str, ov.Node],
+    ) -> ov.Model:
+        """
+        The basic method of the algorithm. This method uses an aggregated list of layers to be recreated.
+        Let us take a graph of this kind as an example:
+
+        Legend: w - weigths, c - convert, il/lh - input low/high, ol/oh - output low/high
+        (w)
+         |
+        (c) (il) (ih) (ol) (oh)
+          \   |   |   /   /
+           (fake quantize) (parameter)
+                  \           /
+                  (convolution)
+                        |
+                    (result)
+
+        The externally collected list of layers will look like this:
+        [convolution, parameter, fake quantize, oh, ol, ih, il, c, w]
+
+        Next, this list will be circled from right to left. At the same time, the list of already created layers
+        will be filled from left to right, which will be used in the traversal step also, from left to right,
+        in order to keep the order of the original layer inputs.
+        For example:
+
+            graph_nodes = [convolution, parameter, fake quantize, oh, ol, ih, il, c, w]
+            clone_nodes = []
+
+        *creating w - weight node.*
+            graph_nodes = [convolution, parameter, fake quantize, oh, ol, ih, il, c]
+            clone_nodes = [w]
+
+        *creating c - convert node.
+        Based on the .inputs() output, we'll use the already created w-weight node to fill in the convert input.
+        As the result, weight node would be removed from the clone_nodes list and convert node would be placed here.*
+            graph_nodes = [convolution, parameter, fake quantize, oh, ol, ih, il]
+            clone_nodes = [c]
+
+        *creating il/lh - input low/high, ol/oh - output low/high nodes.
+        Since these nodes are constants and do not require any nodes as inputs, cloned nodes will not be used.*
+            graph_nodes = [convolution, parameter, fake quantize, oh, ol, ih, il]
+            clone_nodes = [c, il, ih, ol, oh]
+
+        *creating fake quantize node.
+        This node requires to have input values in a specific order.
+        All previous nodes will be connected/used for fake quantize, from left to right.*
+            graph_nodes = [convolution, parameter]
+            clone_nodes = [f]
+
+        *creating parameter node.
+        In this step, the list of parameters will also be filled out with the new node.*
+            graph_nodes = [convolution]
+            clone_nodes = [f, parameter]
+
+        *creating convolution node.
+        This node also requires to have inputs in a specific order.
+        All previous nodes will be connected/used for convolution, from left to right. Also,
+        the outputs verification step will show here that one of the convolution outputs is in the output_ids list.
+        This means that the Result node would be created and placed into the results list.*
+            graph_nodes = []
+            clone_nodes = [convolution]
+
+        The last step is to create a subgraph model based on the parameters & results lists.
+
+        :param input_ids: List of the ids specified in algorithm.
+        :param output_ids: List of the ids specified in algorithm.
+        :param node_mapping: Original nodes mapping.
+        :return: Builded ov.Model based on parameters.
+        """
+
+        parameters, results = [], []
+        clone_nodes = deque()
+
+        # Collecting nodes that declares the graph.
+        graph_nodes = self._collect_graph_nodes(input_ids, output_ids, node_mapping)
+
+        while graph_nodes:
+            graph_node = graph_nodes.pop()
+            node_type = graph_node.get_type_name()
+            node_name = graph_node.get_friendly_name()
+
+            # To create the new OpenVINO nodes, we need to provide all possible layer attributes.
+            attrs = graph_node.get_attributes()
+            attrs["name"] = node_name
+
+            if node_type == "Constant":
+                # Constants creation is apart due to specific behavior.
+                clone_node = OVModelTransformer._create_constant(
+                    graph_node.get_data(), dtype=graph_node.get_element_type(), name=attrs["name"]
+                )
+            elif node_type == "Parameter":
+                # We've created Parameter nodes on the previous step.
+                clone_node = graph_node
+                parameters.append(clone_node)
+            else:
+                # We have to have args as the inputs since all of them are nodes and are required to be as input.
+                args = [clone_nodes.popleft() for _ in graph_node.inputs()]
+
+                clone_node = self._node_factory.create(node_type, args, attrs)
+
+                for node_output in clone_node.outputs():
+                    port_id = node_output.get_index()
+                    if (node_name, port_id) in output_ids:
+                        result = self._create_result(node_name, node_output)
+                        results.append(result)
+
+            clone_nodes.append(clone_node)
+
+        return ov.Model(results, parameters)
diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py
index e3dddd9eca5..52746f06f2e 100644
--- a/nncf/openvino/graph/node_utils.py
+++ b/nncf/openvino/graph/node_utils.py
@@ -122,7 +122,7 @@ def get_const_value(const_node: ov.Node) -> np.ndarray:
 
 
 def get_bias_value(
-    node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model, node_mapping=Dict[str, ov.Node]
+    node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model, node_mapping: Dict[str, ov.Node]
 ) -> np.ndarray:
     """
     Returns the bias tensor for the biased node.
diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
index 2c3dc75a4f6..571c0384d0b 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
@@ -17,7 +17,6 @@
 from nncf.common.factory import EngineFactory
 from nncf.common.factory import ModelTransformerFactory
 from nncf.common.graph.graph import NNCFGraph
-from nncf.common.graph.model_transformer import ModelTransformer
 from nncf.common.graph.transformations.commands import TargetPoint
 from nncf.common.graph.transformations.commands import TargetType
 from nncf.common.graph.transformations.layout import TransformationLayout
@@ -111,7 +110,7 @@ def _set_backend_entity(self, model: TModel) -> None:
                 OVFastBiasCorrectionAlgoBackend,
             )
 
-            self._backend_entity = OVFastBiasCorrectionAlgoBackend()
+            self._backend_entity = OVFastBiasCorrectionAlgoBackend(model)
         elif model_backend == BackendType.TORCH:
             from nncf.quantization.algorithms.fast_bias_correction.torch_backend import PTFastBiasCorrectionAlgoBackend
 
@@ -163,15 +162,12 @@ def apply(
 
             output_fp = self._get_fp_outputs(statistic_points, out_node_name)
 
-            if hasattr(self._backend_entity, "build_submodel"):
-                extracted_model = self._backend_entity.build_submodel(model, node, input_port_id, 0)
-            else:
-                # In case of the matrix multiplication layers, this is crucial to know the correct input port.
-                input_id = (in_node_name, input_port_id)
-                # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends.
-                output_id = (out_node_name, 0)
+            # In case of the matrix multiplication layers, this is crucial to know the correct input port.
+            input_id = (in_node_name, input_port_id)
+            # Outputs of the subgraphs for the FastBiasCorrection are the same across the backends.
+            output_id = (out_node_name, 0)
 
-                extracted_model = self._extract_submodel(model_transformer, input_id, output_id)
+            extracted_model = self._backend_entity.extract_submodel(model_transformer, input_id, output_id)
             if extracted_model is None:
                 nncf_logger.debug(f"Skipping node {node_name} because cant extract submodel")
                 continue
@@ -291,23 +287,6 @@ def output_filter_func(point):
             output_fp.extend(tensor_collector.get_statistics().mean_values)
         return output_fp
 
-    def _extract_submodel(
-        self, model_transformer: ModelTransformer, input_id: Tuple[str, int], output_id: Tuple[str, int]
-    ) -> TModel:
-        """
-        Extracts sub-model using backend-specific ModelTransformer.
-
-        :param model_transformer: Backend-specific ModelTransformer.
-        :param input_id: Input ID.
-        :param output_id: Output ID.
-        :return: Backend-specific sub-model.
-        """
-        model_extraction_command = self._backend_entity.model_extraction_command([input_id], [output_id])
-        me_transformation_layout = TransformationLayout()
-        me_transformation_layout.register(model_extraction_command)
-        extracted_model = model_transformer.transform(me_transformation_layout)
-        return extracted_model
-
     def _add_statistic_point(self, container: StatisticPointsContainer, point: TargetPoint, axis: int) -> None:
         """
         Adds specific statistic point.
diff --git a/nncf/quantization/algorithms/fast_bias_correction/backend.py b/nncf/quantization/algorithms/fast_bias_correction/backend.py
index 110e05161cd..7c76b3857fe 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/backend.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/backend.py
@@ -15,9 +15,11 @@
 
 from nncf.common.graph import NNCFGraph
 from nncf.common.graph import NNCFNode
+from nncf.common.graph.model_transformer import ModelTransformer
 from nncf.common.graph.transformations.commands import TargetPoint
 from nncf.common.graph.transformations.commands import TargetType
 from nncf.common.graph.transformations.commands import TransformationCommand
+from nncf.common.graph.transformations.layout import TransformationLayout
 from nncf.common.tensor_statistics.collectors import TensorStatisticCollectorBase
 from nncf.tensor import Tensor
 
@@ -194,3 +196,20 @@ def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: Tuple
         :param input_shape: Shape of the input.
         :return: Channel axis number.
         """
+
+    def extract_submodel(
+        self, model_transformer: ModelTransformer, input_id: Tuple[str, int], output_id: Tuple[str, int]
+    ) -> TModel:
+        """
+        Extracts sub-model using backend-specific ModelTransformer.
+
+        :param model_transformer: Backend-specific ModelTransformer.
+        :param input_id: Input ID.
+        :param output_id: Output ID.
+        :return: Backend-specific sub-model.
+        """
+        model_extraction_command = self.model_extraction_command([input_id], [output_id])
+        me_transformation_layout = TransformationLayout()
+        me_transformation_layout.register(model_extraction_command)
+        extracted_model = model_transformer.transform(me_transformation_layout)
+        return extracted_model
diff --git a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
index 5c304d865a8..b79e165228e 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/openvino_backend.py
@@ -20,7 +20,7 @@
 from nncf.experimental.common.tensor_statistics.collectors import TensorCollector
 from nncf.openvino.graph.metatypes.groups import FAKE_QUANTIZE_OPERATIONS
 from nncf.openvino.graph.metatypes.groups import OPERATIONS_WITH_BIAS_REDUCED
-from nncf.openvino.graph.model_builder import build_for_fast_bc
+from nncf.openvino.graph.model_builder import ModelBuilder
 from nncf.openvino.graph.node_utils import get_activation_channel_axis
 from nncf.openvino.graph.node_utils import get_bias_value
 from nncf.openvino.graph.node_utils import is_node_with_bias
@@ -35,17 +35,10 @@
 
 class OVFastBiasCorrectionAlgoBackend(FastBiasCorrectionAlgoBackend):
 
-    def __init__(self):
-        super().__init__()
-        self._node_mapping = None
-
-    @property
-    def node_mapping(self):
-        return self._node_mapping
-
-    @node_mapping.setter
-    def node_mapping(self, model):
+    def __init__(self, model):
+        # Node mapping caching to reduce time for calculations
         self._node_mapping = {op.get_friendly_name(): op for op in model.get_ops()}
+        self._model_builder = ModelBuilder()
 
     @staticmethod
     def target_point(target_type: TargetType, target_node_name: str, port_id: int) -> OVTargetPoint:
@@ -88,7 +81,7 @@ def create_input_data(
         return input_data
 
     def get_bias_value(self, node: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model) -> Tensor:
-        return Tensor(get_bias_value(node, nncf_graph, model, node_mapping=self.node_mapping))
+        return Tensor(get_bias_value(node, nncf_graph, model, node_mapping=self._node_mapping))
 
     @staticmethod
     def get_activation_port_ids_for_bias_node(node: NNCFNode) -> Tuple[int, int]:
@@ -127,14 +120,10 @@ def get_node_names_for_input_output_statistics(node: NNCFNode, nncf_graph: NNCFG
     def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: Tuple[int]) -> int:
         return get_activation_channel_axis(node, port_id, input_shape)
 
-    def build_submodel(self, model: ov.Model, node: NNCFNode, input_port_id: int, output_port_id: int) -> ov.Model:
-        const_port_ids = node.layer_attributes.get_const_port_ids()
-        assert len(const_port_ids) == 1
-        return build_for_fast_bc(
-            model,
-            node,
-            act_port_id=input_port_id,
-            weight_port_id=const_port_ids[0],
-            out_port_id=output_port_id,
-            node_mapping=self.node_mapping,
+    def extract_submodel(self, model_transformer, input_id, output_id):
+
+        return self._model_builder.build(
+            input_ids=[input_id],
+            output_ids=[output_id],
+            node_mapping=self._node_mapping,
         )

From 84db55fb7f147d3da340e1daa4c7c7114b9d07e2 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Wed, 18 Dec 2024 16:51:54 +0100
Subject: [PATCH 6/8] Remove unused code

---
 .../algorithms/fast_bias_correction/torch_backend.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py
index 9bf1e640a82..7eda61ce64a 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/torch_backend.py
@@ -39,18 +39,6 @@ class PTFastBiasCorrectionAlgoBackend(FastBiasCorrectionAlgoBackend):
         TargetType.POST_LAYER_OPERATION: TargetType.OPERATOR_POST_HOOK,
     }
 
-    def __init__(self):
-        super().__init__()
-        self._node_mapping = None
-
-    @property
-    def node_mapping(self):
-        return self._node_mapping
-
-    @node_mapping.setter
-    def node_mapping(self, model):
-        self._node_mapping = {}
-
     @staticmethod
     def target_point(target_type: TargetType, target_node_name: str, port_id: int) -> PTTargetPoint:
         if NNCFGraphNodeType.INPUT_NODE in target_node_name or target_type == TargetType.POST_LAYER_OPERATION:

From e7e7ea50bd86d4b85fbd89080cd973b33f0810a5 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Wed, 18 Dec 2024 16:53:30 +0100
Subject: [PATCH 7/8] Remove unused code

---
 nncf/openvino/graph/model_utils.py                  | 13 -------------
 .../algorithms/fast_bias_correction/algorithm.py    |  1 -
 2 files changed, 14 deletions(-)

diff --git a/nncf/openvino/graph/model_utils.py b/nncf/openvino/graph/model_utils.py
index 7c0fb6f3695..b89c3cba890 100644
--- a/nncf/openvino/graph/model_utils.py
+++ b/nncf/openvino/graph/model_utils.py
@@ -12,7 +12,6 @@
 from typing import List
 
 import openvino.runtime as ov
-from openvino._pyopenvino import DescriptorTensor
 
 from nncf.common.factory import ModelTransformerFactory
 from nncf.common.graph.graph import NNCFGraph
@@ -107,15 +106,3 @@ def copy_rt_info(model_source: ov.Model, model_dest: ov.Model, path: List[str])
     if model_source.has_rt_info(path):
         source_rt_info = model_source.get_rt_info(path)
         model_dest.set_rt_info(source_rt_info, path)
-
-
-def update_tensor_name(tensors: List[DescriptorTensor], name: str) -> None:
-    """
-    Updates tensors names in-place.
-    :param model: List of the tensors.
-    :param name: New name for tensor.
-    """
-    for tensor in tensors:
-        current_names = tensor.get_names()
-        current_names.add(name)
-        tensor.set_names(current_names)
diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
index 571c0384d0b..35f057f8a66 100644
--- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
+++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
@@ -134,7 +134,6 @@ def apply(
         dataset: Optional[Dataset] = None,
     ) -> TModel:
         self._set_backend_entity(model)
-        self._backend_entity.node_mapping = model
 
         model_transformer = ModelTransformerFactory.create(model)
 

From ac28262d078773d94d974362cfd15260d34dbee1 Mon Sep 17 00:00:00 2001
From: Nikita Malinin <nikita.malinin@intel.com>
Date: Wed, 18 Dec 2024 16:56:15 +0100
Subject: [PATCH 8/8] Add WA

---
 nncf/openvino/graph/node_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py
index 52746f06f2e..e73fdb14026 100644
--- a/nncf/openvino/graph/node_utils.py
+++ b/nncf/openvino/graph/node_utils.py
@@ -122,7 +122,7 @@ def get_const_value(const_node: ov.Node) -> np.ndarray:
 
 
 def get_bias_value(
-    node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model, node_mapping: Dict[str, ov.Node]
+    node_with_bias: NNCFNode, nncf_graph: NNCFGraph, model: ov.Model, node_mapping: Dict[str, ov.Node] = None
 ) -> np.ndarray:
     """
     Returns the bias tensor for the biased node.
@@ -130,8 +130,11 @@ def get_bias_value(
     :param node_with_bias: The node that corresponds to the operation with bias.
     :param nncf_graph: NNCFGraph instance.
     :param model: The model that contains this operation.
+    :param node_mapping: Original nodes mapping cache.
     :return: The bias value that is applied to the output tensor of the node's operation.
     """
+    if node_mapping is None:
+        node_mapping = {op.get_friendly_name(): op for op in model.get_ops()}
     bias_constant = get_node_with_bias_value(get_add_bias_node(node_with_bias, nncf_graph), nncf_graph)
     ov_bias_constant = node_mapping[bias_constant.node_name]
     return get_const_value(ov_bias_constant)