Skip to content

Commit

Permalink
Use fp32 inference precision (#3018)
Browse files Browse the repository at this point in the history
### Changes

- Use fp32 inference precision

### Reason for changes

Ref: 140438

### Related tickets

Ref: 140438
  • Loading branch information
andrey-churkin authored Oct 25, 2024
1 parent 1d75eac commit 6afb13d
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 6 deletions.
14 changes: 12 additions & 2 deletions nncf/openvino/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

import numpy as np
import openvino.runtime as ov
from openvino import Type
from openvino.properties.hint import inference_precision

from nncf.common.engine import Engine
from nncf.openvino.graph.model_utils import model_has_state
Expand Down Expand Up @@ -62,10 +64,18 @@ class OVNativeEngine(Engine):
to infer the model.
"""

def __init__(self, model: ov.Model):
def __init__(self, model: ov.Model, use_fp32_precision: bool = True):
"""
:param model: Model.
:param use_fp32_precision: A flag that determines whether to force the engine to use FP32
precision during inference.
"""
config = None
if use_fp32_precision:
config = {inference_precision: Type.f32}
ie = ov.Core()
stateful = model_has_state(model)
compiled_model = ie.compile_model(model, device_name="CPU")
compiled_model = ie.compile_model(model, device_name="CPU", config=config)
self.engine = OVCompiledModelEngine(compiled_model, stateful)

def infer(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

import numpy as np
import openvino.runtime as ov
from openvino import Type
from openvino.properties.hint import inference_precision

from nncf.common.graph import NNCFGraph
from nncf.common.graph import NNCFNode
Expand Down Expand Up @@ -40,9 +42,12 @@ class OVPreparedModel(PreparedModel):
Implementation of the `PreparedModel` for OpenVINO backend.
"""

def __init__(self, model: ov.Model):
def __init__(self, model: ov.Model, use_fp32_precision: bool = True):
self._stateful = model_has_state(model)
self._compiled_model = ov.compile_model(model, device_name="CPU")
config = None
if use_fp32_precision:
config = {inference_precision: Type.f32}
self._compiled_model = ov.compile_model(model, device_name="CPU", config=config)
self._engine = None

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from typing import Dict, Iterable, List, Optional, Tuple

import openvino as ov
from openvino import Type
from openvino.properties.hint import inference_precision
from openvino.runtime import opset13 as opset

import nncf
Expand Down Expand Up @@ -346,7 +348,7 @@ def get_compress_decompress_pipeline(config: WeightCompressionConfig, w_shape, s

model = ov.Model([result], parameters)

compiled_model = ov.compile_model(model, device_name="CPU")
compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision: Type.f32})

return lambda parameters: compiled_model(parameters)[0]

Expand Down Expand Up @@ -379,7 +381,7 @@ def get_compress_pipeline(config: WeightCompressionConfig, w_shape, s_shape, z_p

model = ov.Model([result], parameters)

compiled_model = ov.compile_model(model, device_name="CPU")
compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision: Type.f32})

return lambda parameters: compiled_model(parameters)[0]

Expand Down

0 comments on commit 6afb13d

Please sign in to comment.