diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py index 77d09793b..2a8cdd2d0 100644 --- a/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py +++ b/onnx_neural_compressor/algorithms/post_training_quant/operators/direct_q8.py @@ -40,7 +40,7 @@ def quantize_check(self): def quantize(self): """Do quantizaion.""" node = self.node - self.quantizer.quantize_inputs(self.node, [0], direct_int8=True) + self.quantizer.quantize_inputs(self.node, [0], initializer_use_weight_qType=False, direct_int8=True) if not self.disable_qdq_for_node_output: self.quantizer.quantize_outputs(self.node, direct_int8=True) node.name = node.name + "_quant" diff --git a/onnx_neural_compressor/quantization/config.py b/onnx_neural_compressor/quantization/config.py index ca00c9444..f4fe2672e 100644 --- a/onnx_neural_compressor/quantization/config.py +++ b/onnx_neural_compressor/quantization/config.py @@ -1525,7 +1525,7 @@ def __init__( calibration_data_reader: data_reader.CalibrationDataReader = None, calibrate_method=quantization.CalibrationMethod.MinMax, quant_format=quantization.QuantFormat.QOperator, - activation_type=quantization.QuantType.QInt8, + activation_type=quantization.QuantType.QUInt8, weight_type=quantization.QuantType.QInt8, op_types_to_quantize=None, nodes_to_quantize=None, @@ -1699,6 +1699,8 @@ def to_config_mapping(self, config_list: list = None, model_info: list = None) - def get_config_set_for_tuning( cls, quant_format=quantization.QuantFormat.QOperator, + activation_type=quantization.QuantType.QUInt8, + weight_type=quantization.QuantType.QInt8, execution_provider=None, op_types_to_quantize=None, nodes_to_exclude=None, @@ -1733,6 +1735,8 @@ def get_config_set_for_tuning( for item in op_type_candidate: cfg_lst.append( StaticQuantConfig( + activation_type=activation_type, + weight_type=weight_type, execution_provider=execution_provider, quant_format=quant_format, reduce_range=reduce_range, @@ -2126,6 +2130,7 @@ def to_config_mapping(self, config_list: list = None, model_info: list = None) - @classmethod def get_config_set_for_tuning( cls, + weight_type=quantization.QuantType.QInt8, execution_provider=None, op_types_to_quantize: List[str] = None, nodes_to_exclude: List[str] = None, @@ -2160,6 +2165,7 @@ def get_config_set_for_tuning( for item in op_type_candidate: cfg_lst.append( DynamicQuantConfig( + weight_type=weight_type, execution_provider=execution_provider, op_types_to_quantize=item, nodes_to_exclude=nodes_to_exclude,