Skip to content

Commit

Permalink
INT4 experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Sep 26, 2024
1 parent 43967ab commit c9569bb
Show file tree
Hide file tree
Showing 5 changed files with 338 additions and 306 deletions.
3 changes: 2 additions & 1 deletion nncf/openvino/graph/node_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ def get_const_value(const_node: ov.Node) -> np.ndarray:
:return: The constant value.
"""
INPUT_DTYPE = os.environ.get("INPUT_DTYPE", "fp32")
if const_node.get_element_type() == ov.Type.bf16 and INPUT_DTYPE != "bf16":
NUMPY_COMPRESSION = bool(int(os.environ.get("NUMPY_COMPRESSION", "0")))
if const_node.get_element_type() == ov.Type.bf16 and (INPUT_DTYPE != "bf16" or NUMPY_COMPRESSION):
# Fixed FP32 data type as the result for BF16 constant
return const_node.get_data(dtype=np.float32)
return const_node.data
Expand Down
2 changes: 2 additions & 0 deletions nncf/openvino/quantization/compression_primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,11 +264,13 @@ def _get_compress_model(
num_bits = config.num_bits
if config.mode in [CompressWeightsMode.INT8_ASYM, config.mode.INT4_ASYM]:
dtype = ov.Type.u8
# dtype = ov.Type.u8 if config.mode == CompressWeightsMode.INT8_ASYM else ov.Type.u4
level_low = 0
level_high = 2**num_bits - 1
compressed_w += opset.convert(zp, ov.Type.f32)
elif config.mode in [CompressWeightsMode.INT8_SYM, config.mode.INT4_SYM]:
dtype = ov.Type.i8
# dtype = ov.Type.i8 if config.mode == CompressWeightsMode.INT8_SYM else ov.Type.u4
level_low = -(2 ** (num_bits - 1))
level_high = 2 ** (num_bits - 1) - 1
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Dict, Iterable, List, Optional, Tuple

import openvino as ov
Expand Down Expand Up @@ -226,6 +227,15 @@ def _create_compression_subgraph(
original_shape = weight.shape
compressed_weight = compress_weight(weight, reduction_axes, compression_config, layer_scales, layer_zero_points)

# NUMPY_COMPRESSION = bool(int(os.environ.get("NUMPY_COMPRESSION", "0")))
# FP32_OUTPUT = bool(int(os.environ.get("FP32_OUTPUT", "0")))
# if compression_config.mode in [CompressWeightsMode.INT4_ASYM, CompressWeightsMode.INT4_SYM] and not NUMPY_COMPRESSION and not FP32_OUTPUT:
# flat_compressed_weight = ov.Tensor(compressed_weight.tensor.data, (compressed_weight.tensor.shape[0]*2, ), compression_dtype)
# compressed_const = opset.constant(flat_compressed_weight)
# compressed_shape = compressed_weight.scale.shape[:2] + (compression_config.group_size,)
# compressed_const = opset.reshape(compressed_const, compressed_shape)
# else:
# compressed_const = opset.constant(compressed_weight.tensor.data, dtype=compression_dtype, name=const_node_name)
compressed_const = opset.constant(compressed_weight.tensor.data, dtype=compression_dtype, name=const_node_name)
converted_const = opset.convert(compressed_const, ov.Type.f16)
if compressed_weight.zero_point is not None and compressed_weight.tensor.dtype == TensorDataType.uint8:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,15 +350,27 @@ def calculate_quantized_weight(
ov_compression = weight.backend in [TensorBackend.numpy, TensorBackend.ov] and is_openvino_available() and not NUMPY_COMPRESSION
compressed_weights_ov, scale_ov, zero_point_ov = None, None, None
if ov_compression:
import openvino as ov
from nncf.openvino.quantization.compression_primitives import OV_COMPRESSION_PRIMITIVE_CACHE

# if INPUT_DTYPE == "bf16":
# assert weight.dtype == TensorDataType.float16
# input_dtype = ov.Type.bf16
# elif weight.dtype == TensorDataType.float16:
# input_dtype = ov.Type.f16
# elif weight.dtype == TensorDataType.float32:
# input_dtype = ov.Type.f32
# else:
# raise Exception
# input_tensors = (ov.Tensor(weight.data, weight.data.shape, input_dtype),)

if INPUT_DTYPE == "bf16":
import openvino as ov
assert weight.data.dtype == np.float16
weight_data = ov.Tensor(weight.data, weight.data.shape, ov.Type.bf16)
else:
weight_data = weight.data
input_tensors = (weight_data,)

if not END_TO_END_COMPRESSION:
zero_point_shape = None if zero_point is None else zero_point.shape
compiled_model, compress_weight_primitive = OV_COMPRESSION_PRIMITIVE_CACHE.get_compress_weight_primitive(
Expand Down
Loading

0 comments on commit c9569bb

Please sign in to comment.