diff --git a/nncf/openvino/engine.py b/nncf/openvino/engine.py index 04805fac31f..1591cf7c362 100644 --- a/nncf/openvino/engine.py +++ b/nncf/openvino/engine.py @@ -13,6 +13,8 @@ import numpy as np import openvino.runtime as ov +from openvino import Type +from openvino.properties.hint import inference_precision from nncf.common.engine import Engine from nncf.openvino.graph.model_utils import model_has_state @@ -62,10 +64,18 @@ class OVNativeEngine(Engine): to infer the model. """ - def __init__(self, model: ov.Model): + def __init__(self, model: ov.Model, use_fp32_precision: bool = True): + """ + :param model: Model. + :param use_fp32_precision: A flag that determines whether to force the engine to use FP32 + precision during inference. + """ + config = None + if use_fp32_precision: + config = {inference_precision: Type.f32} ie = ov.Core() stateful = model_has_state(model) - compiled_model = ie.compile_model(model, device_name="CPU") + compiled_model = ie.compile_model(model, device_name="CPU", config=config) self.engine = OVCompiledModelEngine(compiled_model, stateful) def infer( diff --git a/nncf/quantization/algorithms/accuracy_control/openvino_backend.py b/nncf/quantization/algorithms/accuracy_control/openvino_backend.py index 694156edce9..37961be77e2 100644 --- a/nncf/quantization/algorithms/accuracy_control/openvino_backend.py +++ b/nncf/quantization/algorithms/accuracy_control/openvino_backend.py @@ -13,6 +13,8 @@ import numpy as np import openvino.runtime as ov +from openvino import Type +from openvino.properties.hint import inference_precision from nncf.common.graph import NNCFGraph from nncf.common.graph import NNCFNode @@ -40,9 +42,12 @@ class OVPreparedModel(PreparedModel): Implementation of the `PreparedModel` for OpenVINO backend. """ - def __init__(self, model: ov.Model): + def __init__(self, model: ov.Model, use_fp32_precision: bool = True): self._stateful = model_has_state(model) - self._compiled_model = ov.compile_model(model, device_name="CPU") + config = None + if use_fp32_precision: + config = {inference_precision: Type.f32} + self._compiled_model = ov.compile_model(model, device_name="CPU", config=config) self._engine = None @property diff --git a/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/nncf/quantization/algorithms/weight_compression/openvino_backend.py index 34b70b05a88..3d17d1a6af4 100644 --- a/nncf/quantization/algorithms/weight_compression/openvino_backend.py +++ b/nncf/quantization/algorithms/weight_compression/openvino_backend.py @@ -11,6 +11,8 @@ from typing import Dict, Iterable, List, Optional, Tuple import openvino as ov +from openvino import Type +from openvino.properties.hint import inference_precision from openvino.runtime import opset13 as opset import nncf @@ -346,7 +348,7 @@ def get_compress_decompress_pipeline(config: WeightCompressionConfig, w_shape, s model = ov.Model([result], parameters) - compiled_model = ov.compile_model(model, device_name="CPU") + compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision: Type.f32}) return lambda parameters: compiled_model(parameters)[0] @@ -379,7 +381,7 @@ def get_compress_pipeline(config: WeightCompressionConfig, w_shape, s_shape, z_p model = ov.Model([result], parameters) - compiled_model = ov.compile_model(model, device_name="CPU") + compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision: Type.f32}) return lambda parameters: compiled_model(parameters)[0]