Skip to content

Commit

Permalink
BF16 fix
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Sep 12, 2024
1 parent 8e1d7b4 commit 43967ab
Show file tree
Hide file tree
Showing 5 changed files with 331 additions and 98 deletions.
11 changes: 2 additions & 9 deletions nncf/openvino/graph/node_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,8 @@ def get_const_value(const_node: ov.Node) -> np.ndarray:
:param const_node: OpenVINO node.
:return: The constant value.
"""
if const_node.get_element_type() == ov.Type.bf16:
INPUT_DTYPE = os.environ.get("INPUT_DTYPE", "fp32")
if INPUT_DTYPE == "bf16":
ov_tensor = ov.Tensor(const_node.output(0))
assert ov_tensor.element_type == ov.Type.bf16
# ov_tensor_bytes = ov_tensor.data.tobytes()
# assert all(map(lambda b: b == ov_tensor_bytes[0], ov_tensor_bytes))
# exit(0)
return ov_tensor
INPUT_DTYPE = os.environ.get("INPUT_DTYPE", "fp32")
if const_node.get_element_type() == ov.Type.bf16 and INPUT_DTYPE != "bf16":
# Fixed FP32 data type as the result for BF16 constant
return const_node.get_data(dtype=np.float32)
return const_node.data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,8 @@ def transform_model(
layer_zero_points = (
None if precomputed_zero_points is None else precomputed_zero_points.get(wc_params.weight_name)
)
import os
os.environ["CURRENT_NODE_NAME"] = wc_params.weight_name
mul, compressed_weight = self._create_compression_subgraph(
weight=weight,
compression_config=wc_params.compression_config,
Expand Down
21 changes: 19 additions & 2 deletions nncf/quantization/algorithms/weight_compression/weight_lowering.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,12 +346,19 @@ def calculate_quantized_weight(
NUMPY_COMPRESSION = bool(int(os.environ.get("NUMPY_COMPRESSION", "0")))
END_TO_END_COMPRESSION = bool(int(os.environ.get("END_TO_END_COMPRESSION", "0")))
COMPARE_WITH_NUMPY = bool(int(os.environ.get("COMPARE_WITH_NUMPY", "0")))
INPUT_DTYPE = os.environ.get("INPUT_DTYPE", "fp32")
ov_compression = weight.backend in [TensorBackend.numpy, TensorBackend.ov] and is_openvino_available() and not NUMPY_COMPRESSION
compressed_weights_ov, scale_ov, zero_point_ov = None, None, None
if ov_compression:
from nncf.openvino.quantization.compression_primitives import OV_COMPRESSION_PRIMITIVE_CACHE

input_tensors = (weight.data,)
if INPUT_DTYPE == "bf16":
import openvino as ov
assert weight.data.dtype == np.float16
weight_data = ov.Tensor(weight.data, weight.data.shape, ov.Type.bf16)
else:
weight_data = weight.data
input_tensors = (weight_data,)
if not END_TO_END_COMPRESSION:
zero_point_shape = None if zero_point is None else zero_point.shape
compiled_model, compress_weight_primitive = OV_COMPRESSION_PRIMITIVE_CACHE.get_compress_weight_primitive(
Expand Down Expand Up @@ -379,7 +386,17 @@ def calculate_quantized_weight(
if weight.dtype != TensorDataType.float32:
weight = weight.astype(TensorDataType.float32)

if COMPARE_WITH_NUMPY:
if INPUT_DTYPE == "bf16" and COMPARE_WITH_NUMPY:
# We need such workaround because `weight` actually contains bf16 data
MODEL_PATH = os.environ.get("MODEL_PATH")
CURRENT_NODE_NAME = os.environ.get("CURRENT_NODE_NAME")
import openvino as ov
model = ov.Core().read_model(MODEL_PATH)
name_to_node_mapping = {node.get_friendly_name(): node for node in model.get_ordered_ops()}
weight_node = name_to_node_mapping[CURRENT_NODE_NAME]
weight = Tensor(weight_node.get_data(dtype=np.float32))

if COMPARE_WITH_NUMPY and scale is None:
if config.group_size != -1:
# weights are reshaped from [a1, r, a2] to [a1, r//gs, gs, a2]
weight, reduction_axes = reshape_weight_for_grouped_quantization(weight, reduction_axes, config.group_size)
Expand Down
Loading

0 comments on commit 43967ab

Please sign in to comment.