Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use fp32 inference precision #3018

Merged
merged 6 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions nncf/common/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,13 @@ def create(model: TModel, inplace: bool = False) -> ModelTransformer:

class EngineFactory:
@staticmethod
def create(model: TModel) -> Engine:
def create(model: TModel, use_fp32_precision: bool = False) -> Engine:
"""
Factory method to create backend-specific Engine instance based on the input model.

:param model: backend-specific model instance.
:param use_fp32_precision: A flag that determines whether to force the engine to use FP32
precision during inference.
:return: backend-specific Engine instance.
"""
model_backend = get_backend(model)
Expand All @@ -102,7 +104,7 @@ def create(model: TModel) -> Engine:
if model_backend == BackendType.OPENVINO:
from nncf.openvino.engine import OVNativeEngine

return OVNativeEngine(model)
return OVNativeEngine(model, use_fp32_precision)
if model_backend in (BackendType.TORCH, BackendType.TORCH_FX):
from nncf.torch.engine import PTEngine

Expand Down
2 changes: 1 addition & 1 deletion nncf/common/tensor_statistics/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None:
merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph)
transformation_layout = self._get_transformation_layout_extra_outputs(merged_statistics)
model_with_outputs: TModel = model_transformer.transform(transformation_layout)
engine = factory.EngineFactory.create(model_with_outputs)
engine = factory.EngineFactory.create(model_with_outputs, use_fp32_precision=True)
iterations_number = self._get_iterations_number()
processed_samples = 0
for input_data in track( # type: ignore
Expand Down
9 changes: 7 additions & 2 deletions nncf/openvino/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

import numpy as np
import openvino.runtime as ov
from openvino import Type
from openvino.properties.hint import inference_precision

from nncf.common.engine import Engine
from nncf.openvino.graph.model_utils import model_has_state
Expand Down Expand Up @@ -62,10 +64,13 @@ class OVNativeEngine(Engine):
to infer the model.
"""

def __init__(self, model: ov.Model):
def __init__(self, model: ov.Model, use_fp32_precision: bool = False):
alexsu52 marked this conversation as resolved.
Show resolved Hide resolved
config = None
if use_fp32_precision:
config = {inference_precision: Type.f32}
ie = ov.Core()
stateful = model_has_state(model)
compiled_model = ie.compile_model(model, device_name="CPU")
compiled_model = ie.compile_model(model, device_name="CPU", config=config)
self.engine = OVCompiledModelEngine(compiled_model, stateful)

def infer(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

import numpy as np
import openvino.runtime as ov
from openvino import Type
from openvino.properties.hint import inference_precision

from nncf.common.graph import NNCFGraph
from nncf.common.graph import NNCFNode
Expand Down Expand Up @@ -42,7 +44,7 @@ class OVPreparedModel(PreparedModel):

def __init__(self, model: ov.Model):
self._stateful = model_has_state(model)
self._compiled_model = ov.compile_model(model, device_name="CPU")
self._compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision: Type.f32})
self._engine = None

@property
Expand Down
4 changes: 2 additions & 2 deletions nncf/quantization/algorithms/bias_correction/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ def _compute_bias_shift(
"""
output_fp = self._get_fp_outputs(statistic_points, node.node_name)
output_tensor_name = self._backend_entity.get_output_name(model, node.node_name, OUTPUT_PORT_OF_NODE)
engine = EngineFactory.create(model)
engine = EngineFactory.create(model, use_fp32_precision=True)
channel_axis = node.metatype.output_channel_axis
q_outputs = []
for feed_dict in feed_dicts:
Expand Down Expand Up @@ -469,7 +469,7 @@ def _collect_new_stats(self, model: TModel, feed_dicts: List, subgraph_data: Dic
:param feed_dicts: List of dictionaries with the input data for the subgraph.
:param subgraph_data: A dictionary with the needed list of the statistic nodes that will be updated.
"""
engine = EngineFactory.create(model)
engine = EngineFactory.create(model, use_fp32_precision=True)
for feed_dict in feed_dicts:
new_q_output = engine.infer(feed_dict)
for output_node_name, output_id in subgraph_data["subgraph_output_ids"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def _get_bias_shift(
:param output_name: Name of the output tensor for the data collection.
:return: Calculated bias shift.
"""
engine = EngineFactory.create(model)
engine = EngineFactory.create(model, use_fp32_precision=True)
raw_output = engine.infer(input_blob)
q_outputs = self._backend_entity.process_model_output(raw_output, output_name)
q_outputs = mean_per_channel(q_outputs, output_channel_axis)
Expand Down
Loading