From 4d8d52d1969472e828974ca71c779bd4c8ad1dc5 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Tue, 23 Jul 2024 16:37:11 +0800 Subject: [PATCH 1/3] Fix default tuning config parameters Signed-off-by: Wang, Mengni --- onnx_neural_compressor/quantization/config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/onnx_neural_compressor/quantization/config.py b/onnx_neural_compressor/quantization/config.py index ca00c9444..d182ca29d 100644 --- a/onnx_neural_compressor/quantization/config.py +++ b/onnx_neural_compressor/quantization/config.py @@ -1699,6 +1699,8 @@ def to_config_mapping(self, config_list: list = None, model_info: list = None) - def get_config_set_for_tuning( cls, quant_format=quantization.QuantFormat.QOperator, + activation_type=quantization.QuantType.QUInt8, + weight_type=quantization.QuantType.QInt8, execution_provider=None, op_types_to_quantize=None, nodes_to_exclude=None, @@ -1733,6 +1735,8 @@ def get_config_set_for_tuning( for item in op_type_candidate: cfg_lst.append( StaticQuantConfig( + activation_type=activation_type, + weight_type=weight_type, execution_provider=execution_provider, quant_format=quant_format, reduce_range=reduce_range, @@ -2126,6 +2130,7 @@ def to_config_mapping(self, config_list: list = None, model_info: list = None) - @classmethod def get_config_set_for_tuning( cls, + weight_type=quantization.QuantType.QInt8, execution_provider=None, op_types_to_quantize: List[str] = None, nodes_to_exclude: List[str] = None, @@ -2160,6 +2165,7 @@ def get_config_set_for_tuning( for item in op_type_candidate: cfg_lst.append( DynamicQuantConfig( + weight_type=weight_type, execution_provider=execution_provider, op_types_to_quantize=item, nodes_to_exclude=nodes_to_exclude, From 1838eafcecf2d03fc79ff29f581b08ec4e4bcfc4 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Tue, 23 Jul 2024 16:44:41 +0800 Subject: [PATCH 2/3] Update config.py Signed-off-by: Wang, Mengni --- onnx_neural_compressor/quantization/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnx_neural_compressor/quantization/config.py b/onnx_neural_compressor/quantization/config.py index d182ca29d..f4fe2672e 100644 --- a/onnx_neural_compressor/quantization/config.py +++ b/onnx_neural_compressor/quantization/config.py @@ -1525,7 +1525,7 @@ def __init__( calibration_data_reader: data_reader.CalibrationDataReader = None, calibrate_method=quantization.CalibrationMethod.MinMax, quant_format=quantization.QuantFormat.QOperator, - activation_type=quantization.QuantType.QInt8, + activation_type=quantization.QuantType.QUInt8, weight_type=quantization.QuantType.QInt8, op_types_to_quantize=None, nodes_to_quantize=None, From a6914e8e0a97c886953f4cb640b0049c6c00e4e1 Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Tue, 23 Jul 2024 21:45:44 +0800 Subject: [PATCH 3/3] Update direct_q8.py Signed-off-by: Wang, Mengni --- .../algorithms/post_training_quant/operators/direct_q8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py index 77d09793b..2a8cdd2d0 100644 --- a/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py +++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py @@ -40,7 +40,7 @@ def quantize_check(self): def quantize(self): """Do quantizaion.""" node = self.node - self.quantizer.quantize_inputs(self.node, [0], direct_int8=True) + self.quantizer.quantize_inputs(self.node, [0], initializer_use_weight_qType=False, direct_int8=True) if not self.disable_qdq_for_node_output: self.quantizer.quantize_outputs(self.node, direct_int8=True) node.name = node.name + "_quant"