openvinotoolkit · alexsu52 · Oct 25, 2024 · Oct 16, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/nncf/common/factory.py b/nncf/common/factory.py
@@ -87,11 +87,13 @@ def create(model: TModel, inplace: bool = False) -> ModelTransformer:
 
 class EngineFactory:
     @staticmethod
-    def create(model: TModel) -> Engine:
+    def create(model: TModel, use_fp32_precision: bool = False) -> Engine:
         """
         Factory method to create backend-specific Engine instance based on the input model.
 
         :param model: backend-specific model instance.
+        :param use_fp32_precision: A flag that determines whether to force the engine to use FP32
+            precision during inference.
         :return: backend-specific Engine instance.
         """
         model_backend = get_backend(model)
@@ -102,7 +104,7 @@ def create(model: TModel) -> Engine:
         if model_backend == BackendType.OPENVINO:
             from nncf.openvino.engine import OVNativeEngine
 
-            return OVNativeEngine(model)
+            return OVNativeEngine(model, use_fp32_precision)
         if model_backend in (BackendType.TORCH, BackendType.TORCH_FX):
             from nncf.torch.engine import PTEngine
 

diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py
@@ -68,7 +68,7 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None:
         merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph)
         transformation_layout = self._get_transformation_layout_extra_outputs(merged_statistics)
         model_with_outputs: TModel = model_transformer.transform(transformation_layout)
-        engine = factory.EngineFactory.create(model_with_outputs)
+        engine = factory.EngineFactory.create(model_with_outputs, use_fp32_precision=True)
         iterations_number = self._get_iterations_number()
         processed_samples = 0
         for input_data in track(  # type: ignore

@@ -13,6 +13,8 @@
 
 import numpy as np
 import openvino.runtime as ov
+from openvino import Type
+from openvino.properties.hint import inference_precision
 
 from nncf.common.engine import Engine
 from nncf.openvino.graph.model_utils import model_has_state
@@ -62,10 +64,13 @@ class OVNativeEngine(Engine):
     to infer the model.
     """
 
-    def __init__(self, model: ov.Model):
+    def __init__(self, model: ov.Model, use_fp32_precision: bool = False):
+        config = None
+        if use_fp32_precision:
+            config = {inference_precision: Type.f32}
         ie = ov.Core()
         stateful = model_has_state(model)
-        compiled_model = ie.compile_model(model, device_name="CPU")
+        compiled_model = ie.compile_model(model, device_name="CPU", config=config)
         self.engine = OVCompiledModelEngine(compiled_model, stateful)
 
     def infer(

@@ -13,6 +13,8 @@
 
 import numpy as np
 import openvino.runtime as ov
+from openvino import Type
+from openvino.properties.hint import inference_precision
 
 from nncf.common.graph import NNCFGraph
 from nncf.common.graph import NNCFNode
@@ -42,7 +44,7 @@ class OVPreparedModel(PreparedModel):
 
     def __init__(self, model: ov.Model):
         self._stateful = model_has_state(model)
-        self._compiled_model = ov.compile_model(model, device_name="CPU")
+        self._compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision: Type.f32})
         self._engine = None
 
     @property

diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py
@@ -420,7 +420,7 @@ def _compute_bias_shift(
         """
         output_fp = self._get_fp_outputs(statistic_points, node.node_name)
         output_tensor_name = self._backend_entity.get_output_name(model, node.node_name, OUTPUT_PORT_OF_NODE)
-        engine = EngineFactory.create(model)
+        engine = EngineFactory.create(model, use_fp32_precision=True)
         channel_axis = node.metatype.output_channel_axis
         q_outputs = []
         for feed_dict in feed_dicts:
@@ -469,7 +469,7 @@ def _collect_new_stats(self, model: TModel, feed_dicts: List, subgraph_data: Dic
         :param feed_dicts: List of dictionaries with the input data for the subgraph.
         :param subgraph_data: A dictionary with the needed list of the statistic nodes that will be updated.
         """
-        engine = EngineFactory.create(model)
+        engine = EngineFactory.create(model, use_fp32_precision=True)
         for feed_dict in feed_dicts:
             new_q_output = engine.infer(feed_dict)
             for output_node_name, output_id in subgraph_data["subgraph_output_ids"]:

diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py
@@ -338,7 +338,7 @@ def _get_bias_shift(
         :param output_name: Name of the output tensor for the data collection.
         :return: Calculated bias shift.
         """
-        engine = EngineFactory.create(model)
+        engine = EngineFactory.create(model, use_fp32_precision=True)
         raw_output = engine.infer(input_blob)
         q_outputs = self._backend_entity.process_model_output(raw_output, output_name)
         q_outputs = mean_per_channel(q_outputs, output_channel_axis)