Skip to content

Commit

Permalink
BF16 support
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Sep 5, 2024
1 parent e6a1a4b commit b527cac
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 13 deletions.
1 change: 1 addition & 0 deletions docs/api/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ def collect_api_entities() -> APIInfo:
"nncf.tensor.functions.numpy_linalg",
"nncf.tensor.functions.torch_numeric",
"nncf.tensor.functions.torch_linalg",
"nncf.tensor.functions.ov",
]

with mock(mock_modules):
Expand Down
5 changes: 4 additions & 1 deletion nncf/openvino/graph/node_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from typing import Any, Callable, Dict, List, Optional, Tuple, Type

import numpy as np
Expand Down Expand Up @@ -116,6 +116,9 @@ def get_const_value(const_node: ov.Node) -> np.ndarray:
:return: The constant value.
"""
if const_node.get_element_type() == ov.Type.bf16:
INPUT_DTYPE = os.environ.get("INPUT_DTYPE", "fp32")
if INPUT_DTYPE == "bf16":
return ov.Tensor(const_node.output(0))
# Fixed FP32 data type as the result for BF16 constant
return const_node.get_data(dtype=np.float32)
return const_node.data
Expand Down
7 changes: 4 additions & 3 deletions nncf/openvino/quantization/compression_primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,16 @@ def _build_compress_model(
invert_scale: Optional[bool] = False,
return_nodes: bool = False,
):
FP16_INPUT = bool(int(os.environ.get("FP16_INPUT", "0")))
INPUT_DTYPE = os.environ.get("INPUT_DTYPE", "fp32")
INT8_OUTPUT = bool(int(os.environ.get("INT8_OUTPUT", "0")))
SHARE_OUTPUTS = bool(int(os.environ.get("SHARE_OUTPUTS", "0")))

w = opset.parameter(weight_shape, name="w", dtype=np.float16 if FP16_INPUT else np.float32)
input_dtype = ov.Type.f32 if INPUT_DTYPE == "fp32" else ov.Type.f16 if INPUT_DTYPE == "f16" else ov.Type.bf16
w = opset.parameter(weight_shape, name="w", dtype=input_dtype)
s = opset.parameter(scale_shape, name="s")
parameters = [w, s]

if FP16_INPUT:
if input_dtype != ov.Type.f32:
w = opset.convert(w, ov.Type.f32)

compressed_w = w * (1 / s) if invert_scale else w / s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,15 +309,16 @@ def calculate_quantized_weight(
log_once(logging.INFO, "Compression time may improve after installing OpenVINO")

NUMPY_COMPRESSION = bool(int(os.environ.get("NUMPY_COMPRESSION", "0")))
if weight.backend == TensorBackend.numpy and is_openvino_available() and not NUMPY_COMPRESSION:
if weight.backend in [TensorBackend.numpy, TensorBackend.ov] and is_openvino_available() and not NUMPY_COMPRESSION:
from nncf.openvino.quantization.compression_primitives import OV_COMPRESSION_PRIMITIVE_CACHE

zero_point_shape = None if zero_point is None else zero_point.shape
compress_weight_primitive = OV_COMPRESSION_PRIMITIVE_CACHE.get_compress_weight_primitive(
config, weight.shape, scale.shape, zero_point_shape
)

assert weight.data.flags["C_CONTIGUOUS"]
if hasattr(weight.data, "flags"):
assert weight.data.flags["C_CONTIGUOUS"]
input_tensors = weight.data, scale.data
if zero_point is not None:
input_tensors += (zero_point.data,)
Expand Down Expand Up @@ -410,8 +411,8 @@ def do_int_quantization(
assert config.is_integer(), "The function supports integer quantization only"
group_size = config.group_size

FP16_INPUT = bool(int(os.environ.get("FP16_INPUT", "0")))
if weight.dtype != TensorDataType.float32 and not FP16_INPUT:
INPUT_DTYPE = os.environ.get("INPUT_DTYPE", "fp32")
if weight.dtype != TensorDataType.float32 and INPUT_DTYPE == "fp32":
weight = weight.astype(TensorDataType.float32)

if group_size != -1:
Expand Down
1 change: 1 addition & 0 deletions nncf/tensor/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class TensorBackend(Enum):

numpy = auto()
torch = auto()
ov = auto()


@dataclass
Expand Down
3 changes: 3 additions & 0 deletions nncf/tensor/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,8 @@ def _initialize_backends():
import nncf.tensor.functions.torch_linalg
import nncf.tensor.functions.torch_numeric # noqa: F401

with contextlib.suppress(ImportError):
import nncf.tensor.functions.ov # noqa: F401


_initialize_backends()
40 changes: 40 additions & 0 deletions nncf/tensor/functions/ov.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import numpy as np
import openvino as ov

from nncf.tensor import TensorDataType
from nncf.tensor.functions import numeric
from .numpy_numeric import DTYPE_MAP as NP_DTYPE_MAP
from ..definitions import TensorBackend

DTYPE_MAP = {
TensorDataType.float16: ov.Type.f16,
TensorDataType.bfloat16: ov.Type.bf16,
TensorDataType.float32: ov.Type.f32,
TensorDataType.float64: ov.Type.f64,
TensorDataType.int8: ov.Type.i8,
TensorDataType.int32: ov.Type.i32,
TensorDataType.int64: ov.Type.i64,
TensorDataType.uint8: ov.Type.u8,
}

DTYPE_MAP_REV = {v: k for k, v in DTYPE_MAP.items()}


@numeric.backend.register(ov.Tensor)
def _(a: ov.Tensor) -> TensorBackend:
return TensorBackend.ov


@numeric.astype.register(ov.Tensor)
def _(a: ov.Tensor, dtype: TensorDataType) -> np.ndarray:
return a.data.astype(NP_DTYPE_MAP[dtype])


@numeric.dtype.register(ov.Tensor)
def _(a: ov.Tensor) -> TensorDataType:
return DTYPE_MAP_REV[a.get_element_type()]


@numeric.size.register(ov.Tensor)
def _(a: ov.Tensor) -> int:
return a.size
12 changes: 7 additions & 5 deletions weight_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def parse_arguments():

parser.add_argument("--dynamic-compression", action="store_true", help="Enable dynamic compression")

parser.add_argument("--fp16-input", action="store_true", help="Enable FP16 input mode")
parser.add_argument("--input-dtype", type=str, choices=["fp32", "fp16", "bf16"], default="fp32", help="OV model input dtype")

parser.add_argument("--bf16-input", action="store_true", help="Enable BF16 input mode")

parser.add_argument("--int8-output", action="store_true", help="Output in int8")

Expand All @@ -61,7 +63,7 @@ def main(args):

numpy_compression = args.numpy_compression
dynamic_compression = args.dynamic_compression
fp16_input = args.fp16_input
input_dtype = args.input_dtype
int8_output = args.int8_output
recompile = args.recompile
share_outputs = args.share_outputs
Expand All @@ -71,7 +73,7 @@ def main(args):
else:
log_dir_suffix = "ov-dynamic" if dynamic_compression else "ov-static"
log_dir_suffix = f"{log_dir_suffix}_{('output-int8' if int8_output else 'output-fp32')}"
log_dir_suffix = f"{log_dir_suffix}_{('input-fp16' if fp16_input else 'input-fp32')}"
log_dir_suffix = f"{log_dir_suffix}_{f'input-{input_dtype}'}"
if recompile:
log_dir_suffix = f"{log_dir_suffix}_recompile"
if share_outputs:
Expand All @@ -89,7 +91,7 @@ def main(args):

os.environ["NUMPY_COMPRESSION"] = f"{int(numpy_compression)}"
os.environ["DYNAMIC_COMPRESSION"] = f"{int(dynamic_compression)}"
os.environ["FP16_INPUT"] = f"{int(fp16_input)}"
os.environ["INPUT_DTYPE"] = input_dtype
os.environ["INT8_OUTPUT"] = f"{int(int8_output)}"
os.environ["RECOMPILE"] = f"{int(recompile)}"
os.environ["SHARE_OUTPUTS"] = f"{int(share_outputs)}"
Expand Down Expand Up @@ -149,7 +151,7 @@ def main(args):
f"{model_path},"
f"{numpy_compression},"
f"{'-' if numpy_compression else 'Dynamic' if dynamic_compression else 'Static'},"
f"{'-' if numpy_compression else 'FP16' if fp16_input else 'FP32'},"
f"{'-' if numpy_compression else input_dtype.upper()},"
f"{'-' if numpy_compression else 'INT8' if int8_output else 'FP32'},"
f"{compression_time:.2f},"
f"{peak_memory:.2f},"
Expand Down

0 comments on commit b527cac

Please sign in to comment.