diff --git a/src/Builder/OpBuildTable.inc b/src/Builder/OpBuildTable.inc index b04431db4a..fb12f255d8 100644 --- a/src/Builder/OpBuildTable.inc +++ b/src/Builder/OpBuildTable.inc @@ -29,7 +29,7 @@ op_dialect_version_map_["BitwiseOr"] = {18}; op_dialect_version_map_["BitwiseXor"] = {18}; op_dialect_version_map_["BlackmanWindow"] = {17}; op_dialect_version_map_["Cast"] = {19}; -op_dialect_version_map_["CastLike"] = {15}; +op_dialect_version_map_["CastLike"] = {19}; op_dialect_version_map_["CastMap"] = {1}; op_dialect_version_map_["CategoryMapper"] = {1}; op_dialect_version_map_["Ceil"] = {13}; @@ -137,7 +137,7 @@ op_dialect_version_map_["Pad"] = {18, 13, 11, 2}; op_dialect_version_map_["Pow"] = {15}; op_dialect_version_map_["QLinearConv"] = {10}; op_dialect_version_map_["QLinearMatMul"] = {10}; -op_dialect_version_map_["QuantizeLinear"] = {13}; +op_dialect_version_map_["QuantizeLinear"] = {19}; op_dialect_version_map_["RNN"] = {14}; op_dialect_version_map_["RandomNormal"] = {1}; op_dialect_version_map_["RandomNormalLike"] = {1}; diff --git a/src/Dialect/ONNX/ONNXOps.td.inc b/src/Dialect/ONNX/ONNXOps.td.inc index 03cdae080c..445f88e318 100644 --- a/src/Dialect/ONNX/ONNXOps.td.inc +++ b/src/Dialect/ONNX/ONNXOps.td.inc @@ -898,9 +898,10 @@ def ONNXCastLikeOp:ONNX_Op<"CastLike", the same data type as the elements of the second input tensor. See documentation of the Cast operator for further details. }]; - let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$input, - AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$target_type); - let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$output); + let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$input, + AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$target_type, + DefaultValuedAttr:$saturate); + let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$output); let extraClassDeclaration = [{ static int getNumberOfOperands() { return 2; @@ -5685,15 +5686,20 @@ def ONNXQuantizeLinearOp:ONNX_Op<"QuantizeLinear", let description = [{ The linear quantization operator. It consumes a high precision tensor, a scale, and a zero point to compute the low precision / quantized tensor. The scale factor and zero point must have same shape, and can be either a scalar for per-tensor / per layer quantization, or a 1-D tensor for per-axis quantization. - The quantization formula is y = saturate ((x / y_scale) + y_zero_point). + The quantization formula is `y = saturate ((x / y_scale) + y_zero_point)`. For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8. - For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details. 'y_zero_point' and 'y' must have same type. - }]; - let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[I32]>]>:$x, - TensorOf<[F32]>:$y_scale, - AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, NoneType]>:$y_zero_point, - DefaultValuedAttr:$axis); - let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>]>:$y); + For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details. + 'y_zero_point' and 'y' must have same type. + 'y_zero_point' is usually not used for quantization to float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz, + but the quantization formula remains the same for consistency and + the type of the attribute 'y_zero_point' still determines the quantization type. + }]; + let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$x, + AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$y_scale, + AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>, NoneType]>:$y_zero_point, + DefaultValuedAttr:$axis, + DefaultValuedAttr:$saturate); + let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$y); let extraClassDeclaration = [{ static int getNumberOfOperands() { return 3; diff --git a/test/mlir/onnx/onnx_shape_inference.mlir b/test/mlir/onnx/onnx_shape_inference.mlir index eb7196d54a..c84805fa35 100644 --- a/test/mlir/onnx/onnx_shape_inference.mlir +++ b/test/mlir/onnx/onnx_shape_inference.mlir @@ -1710,7 +1710,7 @@ func.func @test_castlike_1(%arg0 : tensor<2x3x4xf32>, %arg1 : tensor<2xf16>) -> "onnx.Return"(%1) : (tensor<*xf16>) -> () // CHECK-LABEL: test_castlike_1 - // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16> + // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) {saturate = 1 : si64} : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16> // CHECK: onnx.Return [[RES]] : tensor<2x3x4xf16> } @@ -1739,7 +1739,7 @@ func.func @test_quantize_linear_1(%arg0 : tensor<5x2x3x4xf32>, %arg1 : tensor) -> () // CHECK-LABEL: test_quantize_linear_1 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xi8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xi8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xi8> } @@ -1750,7 +1750,7 @@ func.func @test_quantize_linear_2(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor) -> () // CHECK-LABEL: test_quantize_linear_2 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xui8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xui8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8> } @@ -1762,7 +1762,7 @@ func.func @test_quantize_linear_3(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor) -> () // CHECK-LABEL: test_quantize_linear_3 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, none) -> tensor<5x2x3x4xui8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, none) -> tensor<5x2x3x4xui8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8> } diff --git a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext index 6b515dd8d9..8cd8468602 100644 --- a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext +++ b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext @@ -24,10 +24,10 @@ myfun (x) => (y) { // CHECK-LABEL: func.func @main_graph // CHECK-SAME: ([[PARAM_0_:%.+]]: tensor) -> tensor attributes {input_names = ["x"], output_names = ["y"]} { // CHECK: [[VAR_0_:%.+]] = onnx.Constant {value_float = 2.000000e+00 : f32} : tensor -// CHECK: [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) : (tensor, tensor) -> tensor +// CHECK: [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor, tensor) -> tensor // CHECK-DAG: [[VAR_2_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_1_]]) : (tensor, tensor) -> tensor // CHECK-DAG: [[VAR_3_:%.+]] = onnx.Constant {value_float = 1.000000e+00 : f32} : tensor -// CHECK: [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) : (tensor, tensor) -> tensor +// CHECK: [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor, tensor) -> tensor // CHECK: [[VAR_5_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_4_]]) : (tensor, tensor) -> tensor // CHECK: [[VAR_6_:%.+]] = "onnx.Add"([[VAR_2_]], [[VAR_5_]]) : (tensor, tensor) -> tensor // CHECK: onnx.Return [[VAR_6_]] : tensor diff --git a/utils/gen_onnx_mlir.py b/utils/gen_onnx_mlir.py index 4e50c6a724..a793508896 100755 --- a/utils/gen_onnx_mlir.py +++ b/utils/gen_onnx_mlir.py @@ -98,7 +98,7 @@ 'BitwiseXor': [18], 'BlackmanWindow': [17], 'Cast': [19], - 'CastLike': [15], + 'CastLike': [19], 'CastMap': [1], 'CategoryMapper': [1], 'Ceil': [13], @@ -206,7 +206,7 @@ 'Pow': [15], 'QLinearConv': [10], 'QLinearMatMul': [10], - 'QuantizeLinear': [13], + 'QuantizeLinear': [19], 'RNN': [14], 'RandomNormal': [1], 'RandomNormalLike': [1],