Use fp32 inference precision (#3018)

### Changes - Use fp32 inference precision ### Reason for changes Ref: 140438 ### Related tickets Ref: 140438
openvinotoolkit · Oct 25, 2024 · 6afb13d · 6afb13d
1 parent 1d75eac
commit 6afb13d
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 6 deletions.
diff --git a/nncf/openvino/engine.py b/nncf/openvino/engine.py
@@ -13,6 +13,8 @@
 
 import numpy as np
 import openvino.runtime as ov
+from openvino import Type
+from openvino.properties.hint import inference_precision
 
 from nncf.common.engine import Engine
 from nncf.openvino.graph.model_utils import model_has_state
@@ -62,10 +64,18 @@ class OVNativeEngine(Engine):
     to infer the model.
     """
 
-    def __init__(self, model: ov.Model):
+    def __init__(self, model: ov.Model, use_fp32_precision: bool = True):
+        """
+        :param model: Model.
+        :param use_fp32_precision: A flag that determines whether to force the engine to use FP32
+            precision during inference.
+        """
+        config = None
+        if use_fp32_precision:
+            config = {inference_precision: Type.f32}
         ie = ov.Core()
         stateful = model_has_state(model)
-        compiled_model = ie.compile_model(model, device_name="CPU")
+        compiled_model = ie.compile_model(model, device_name="CPU", config=config)
         self.engine = OVCompiledModelEngine(compiled_model, stateful)
 
     def infer(

diff --git a/nncf/quantization/algorithms/accuracy_control/openvino_backend.py b/nncf/quantization/algorithms/accuracy_control/openvino_backend.py
@@ -13,6 +13,8 @@
 
 import numpy as np
 import openvino.runtime as ov
+from openvino import Type
+from openvino.properties.hint import inference_precision
 
 from nncf.common.graph import NNCFGraph
 from nncf.common.graph import NNCFNode
@@ -40,9 +42,12 @@ class OVPreparedModel(PreparedModel):
     Implementation of the `PreparedModel` for OpenVINO backend.
     """
 
-    def __init__(self, model: ov.Model):
+    def __init__(self, model: ov.Model, use_fp32_precision: bool = True):
         self._stateful = model_has_state(model)
-        self._compiled_model = ov.compile_model(model, device_name="CPU")
+        config = None
+        if use_fp32_precision:
+            config = {inference_precision: Type.f32}
+        self._compiled_model = ov.compile_model(model, device_name="CPU", config=config)
         self._engine = None
 
     @property

diff --git a/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/nncf/quantization/algorithms/weight_compression/openvino_backend.py
@@ -11,6 +11,8 @@
 from typing import Dict, Iterable, List, Optional, Tuple
 
 import openvino as ov
+from openvino import Type
+from openvino.properties.hint import inference_precision
 from openvino.runtime import opset13 as opset
 
 import nncf
@@ -346,7 +348,7 @@ def get_compress_decompress_pipeline(config: WeightCompressionConfig, w_shape, s
 
         model = ov.Model([result], parameters)
 
-        compiled_model = ov.compile_model(model, device_name="CPU")
+        compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision: Type.f32})
 
         return lambda parameters: compiled_model(parameters)[0]
 
@@ -379,7 +381,7 @@ def get_compress_pipeline(config: WeightCompressionConfig, w_shape, s_shape, z_p
 
         model = ov.Model([result], parameters)
 
-        compiled_model = ov.compile_model(model, device_name="CPU")
+        compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision: Type.f32})
 
         return lambda parameters: compiled_model(parameters)[0]