From 1b2987b38f44069c73bec3f2e5b7e27f9972dace Mon Sep 17 00:00:00 2001 From: Megan Hampton Date: Tue, 5 Sep 2023 14:01:18 -0400 Subject: [PATCH 01/13] Add support for string data type Signed-off-by: Megan Hampton --- src/Runtime/OMUnique.inc | 3 ++- src/Runtime/PyExecutionSessionBase.cpp | 3 ++- src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java | 4 ++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Runtime/OMUnique.inc b/src/Runtime/OMUnique.inc index 89f7952993..cdef189f99 100644 --- a/src/Runtime/OMUnique.inc +++ b/src/Runtime/OMUnique.inc @@ -94,7 +94,8 @@ int isLessNum(void *arg1, void *arg2, OM_DATA_TYPE dataType) { return *((int32_t *)arg1) < *((int32_t *)arg2); case ONNX_TYPE_INT64: return *((int64_t *)arg1) < *((int64_t *)arg2); - // case ONNX_TYPE_STRING: + case ONNX_TYPE_STRING: + return *((const char **)arg1) < *((const char **)arg2); case ONNX_TYPE_BOOL: return *((bool *)arg1) < *((bool *)arg2); // case ONNX_TYPE_FLOAT16: diff --git a/src/Runtime/PyExecutionSessionBase.cpp b/src/Runtime/PyExecutionSessionBase.cpp index 4cd1d62a0d..7db2a6947a 100644 --- a/src/Runtime/PyExecutionSessionBase.cpp +++ b/src/Runtime/PyExecutionSessionBase.cpp @@ -105,7 +105,8 @@ std::vector PyExecutionSessionBase::pyRun( dtype = ONNX_TYPE_INT32; else if (py::isinstance>(inputPyArray)) dtype = ONNX_TYPE_INT64; - // string type missing + else if (py::isinstance>(inputPyArray)) + dtype = ONNX_TYPE_STRING; else if (py::isinstance>(inputPyArray)) dtype = ONNX_TYPE_BOOL; else if (py::isinstance>(inputPyArray)) diff --git a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java index f671f20d2b..b189555ae2 100644 --- a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java +++ b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java @@ -95,6 +95,7 @@ private static class Data { put("f2", OMTensor.ONNX_TYPE_FLOAT16); put("f4", OMTensor.ONNX_TYPE_FLOAT); put("f8", OMTensor.ONNX_TYPE_DOUBLE); + put("S", OMTensor.ONNX_TYPE_STRING); }}; private static final HashMap onnx2numpyType = @@ -111,6 +112,9 @@ private static class Data { put(OMTensor.ONNX_TYPE_FLOAT16, numpyEndian+"f2"); put(OMTensor.ONNX_TYPE_FLOAT, numpyEndian+"f4"); put(OMTensor.ONNX_TYPE_DOUBLE, numpyEndian+"f8"); + // numpy documentation: datatype S is zero-terminated bytes (not recommended) + // https://numpy.org/doc/stable/reference/arrays.dtypes.html + put(OMTensor.ONNX_TYPE_STRING, "|S"); }}; private static OMTensor createTensor(String buffer, long[] shape, String dtype) { From 7acc6f5bfc5ffb593808155aa3963fcf6bda2a4e Mon Sep 17 00:00:00 2001 From: Megan Hampton Date: Tue, 5 Sep 2023 14:08:28 -0400 Subject: [PATCH 02/13] Fix format Signed-off-by: Megan Hampton --- src/Runtime/PyExecutionSessionBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Runtime/PyExecutionSessionBase.cpp b/src/Runtime/PyExecutionSessionBase.cpp index 7db2a6947a..f6c72d5fae 100644 --- a/src/Runtime/PyExecutionSessionBase.cpp +++ b/src/Runtime/PyExecutionSessionBase.cpp @@ -105,7 +105,7 @@ std::vector PyExecutionSessionBase::pyRun( dtype = ONNX_TYPE_INT32; else if (py::isinstance>(inputPyArray)) dtype = ONNX_TYPE_INT64; - else if (py::isinstance>(inputPyArray)) + else if (py::isinstance>(inputPyArray)) dtype = ONNX_TYPE_STRING; else if (py::isinstance>(inputPyArray)) dtype = ONNX_TYPE_BOOL; From 563d95f4bab0b3222a9251a357615d4f1d526c57 Mon Sep 17 00:00:00 2001 From: Megan Hampton Date: Tue, 5 Sep 2023 14:11:47 -0400 Subject: [PATCH 03/13] Linter Signed-off-by: Megan Hampton --- .../jni/src/com/ibm/onnxmlir/OMRunner.java | 402 +++++++++--------- 1 file changed, 206 insertions(+), 196 deletions(-) diff --git a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java index b189555ae2..bdc70dc7ec 100644 --- a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java +++ b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java @@ -41,211 +41,221 @@ * output tensors converted from Java HashMap. The tensor * data are base64 encoded. */ -public class OMRunner -{ - private static class Data { - /* For Jsoniter, field names match the keys in the JSON */ - String buffer; - String dtype; - long[] shape; - - /* For Jackson, words after removing get/set of getter/setter - * methods with the first letter lowercased match the keys in - * the JSON. - */ - /* - String getBuffer() { return this.buffer; } - void setBuffer(String buffer) { this.buffer = buffer; } - - String getDtype() { return this.dtype; } - void setDtype(String dtype) { this.dtype = dtype; } - - long[] getShape() { return this.shape; } - void setShape(long[] shape) { this.shape = shape; } - */ - } - - private static final HashMap numpy2javaEndian = - new HashMap() {{ - put(">", ByteOrder.BIG_ENDIAN); - put("<", ByteOrder.LITTLE_ENDIAN); - put("=", ByteOrder.nativeOrder()); - put("|", ByteOrder.nativeOrder()); - }}; - - private static final HashMap java2numpyEndian = - new HashMap() {{ - put(ByteOrder.BIG_ENDIAN, ">");; - put(ByteOrder.LITTLE_ENDIAN, "<");; - }}; - private static final String numpyEndian = - java2numpyEndian.get(ByteOrder.nativeOrder()); - - private static final HashMap numpy2onnxType = - new HashMap() {{ - put("b1", OMTensor.ONNX_TYPE_BOOL); - put("i1", OMTensor.ONNX_TYPE_INT8); - put("u1", OMTensor.ONNX_TYPE_UINT8); - put("i2", OMTensor.ONNX_TYPE_INT16); - put("u2", OMTensor.ONNX_TYPE_UINT16); - put("i4", OMTensor.ONNX_TYPE_INT32); - put("u4", OMTensor.ONNX_TYPE_UINT32); - put("i8", OMTensor.ONNX_TYPE_INT64); - put("u8", OMTensor.ONNX_TYPE_UINT64); - put("f2", OMTensor.ONNX_TYPE_FLOAT16); - put("f4", OMTensor.ONNX_TYPE_FLOAT); - put("f8", OMTensor.ONNX_TYPE_DOUBLE); - put("S", OMTensor.ONNX_TYPE_STRING); - }}; - - private static final HashMap onnx2numpyType = - new HashMap() {{ - put(OMTensor.ONNX_TYPE_BOOL, "|b1"); - put(OMTensor.ONNX_TYPE_INT8, "|i1"); - put(OMTensor.ONNX_TYPE_UINT8, "|u1"); - put(OMTensor.ONNX_TYPE_INT16, numpyEndian+"i2"); - put(OMTensor.ONNX_TYPE_UINT16, numpyEndian+"u2"); - put(OMTensor.ONNX_TYPE_INT32, numpyEndian+"i4"); - put(OMTensor.ONNX_TYPE_UINT32, numpyEndian+"u4"); - put(OMTensor.ONNX_TYPE_INT64, numpyEndian+"i8"); - put(OMTensor.ONNX_TYPE_UINT64, numpyEndian+"u8"); - put(OMTensor.ONNX_TYPE_FLOAT16, numpyEndian+"f2"); - put(OMTensor.ONNX_TYPE_FLOAT, numpyEndian+"f4"); - put(OMTensor.ONNX_TYPE_DOUBLE, numpyEndian+"f8"); - // numpy documentation: datatype S is zero-terminated bytes (not recommended) - // https://numpy.org/doc/stable/reference/arrays.dtypes.html - put(OMTensor.ONNX_TYPE_STRING, "|S"); - }}; - - private static OMTensor createTensor(String buffer, long[] shape, String dtype) { - /* We need a ByteBuffer for OMTensor but ByteBuffer.wrap(bytes) - * does NOT work. Because wrap simply creates a "view" of the - * byte[] as ByteBuffer. The backing byte[] is a Java object - * but the JNI wrapper is expecting a real direct ByteBuffer - * to hold the data to be given to the native code. - */ - byte[] bytes = Base64.getDecoder().decode(buffer); - ByteBuffer data = ByteBuffer.allocateDirect(bytes.length); - data.put(bytes); +public class OMRunner { + private static class Data { + /* For Jsoniter, field names match the keys in the JSON */ + String buffer; + String dtype; + long[] shape; + + /* + * For Jackson, words after removing get/set of getter/setter + * methods with the first letter lowercased match the keys in + * the JSON. + */ + /* + * String getBuffer() { return this.buffer; } + * void setBuffer(String buffer) { this.buffer = buffer; } + * + * String getDtype() { return this.dtype; } + * void setDtype(String dtype) { this.dtype = dtype; } + * + * long[] getShape() { return this.shape; } + * void setShape(long[] shape) { this.shape = shape; } + */ + } - String e = dtype.substring(0, 1); - ByteOrder endian = numpy2javaEndian.get(e); + private static final HashMap numpy2javaEndian = new HashMap() { + { + put(">", ByteOrder.BIG_ENDIAN); + put("<", ByteOrder.LITTLE_ENDIAN); + put("=", ByteOrder.nativeOrder()); + put("|", ByteOrder.nativeOrder()); + } + }; - String t = dtype.substring(1); - Integer otype = numpy2onnxType.get(t); + private static final HashMap java2numpyEndian = new HashMap() { + { + put(ByteOrder.BIG_ENDIAN, ">"); + ; + put(ByteOrder.LITTLE_ENDIAN, "<"); + ; + } + }; + private static final String numpyEndian = java2numpyEndian.get(ByteOrder.nativeOrder()); - return new OMTensor(data, shape, endian, - otype == null ? -1 : otype.intValue()); - } + private static final HashMap numpy2onnxType = new HashMap() { + { + put("b1", OMTensor.ONNX_TYPE_BOOL); + put("i1", OMTensor.ONNX_TYPE_INT8); + put("u1", OMTensor.ONNX_TYPE_UINT8); + put("i2", OMTensor.ONNX_TYPE_INT16); + put("u2", OMTensor.ONNX_TYPE_UINT16); + put("i4", OMTensor.ONNX_TYPE_INT32); + put("u4", OMTensor.ONNX_TYPE_UINT32); + put("i8", OMTensor.ONNX_TYPE_INT64); + put("u8", OMTensor.ONNX_TYPE_UINT64); + put("f2", OMTensor.ONNX_TYPE_FLOAT16); + put("f4", OMTensor.ONNX_TYPE_FLOAT); + put("f8", OMTensor.ONNX_TYPE_DOUBLE); + put("S", OMTensor.ONNX_TYPE_STRING); + } + }; - private static HashMap encodeTensor(OMTensor omt) throws Exception { - /* We need a byte[] for base64 encode but buffer.array() - * does NOT work. Because the buffer is backed by JNI code - * generated array, not a Java byte[] object. So base64 - * encode results in UnsupportedOperationException. - */ - ByteBuffer buffer = omt.getData(); - byte[] bytes = new byte[buffer.limit()]; - buffer.get(bytes); - - String dtype = onnx2numpyType.get(omt.getDataType()); - - HashMap map = new HashMap(); - map.put("buffer", Base64.getEncoder().encodeToString(bytes/*buffer.array()*/)); - map.put("dtype", dtype); - map.put("shape", omt.getShape()); - return map; - } - - /* Model inputs are read from stdin encoded in JSON. This routine will - * - * - read JSON from stdin - * - decode JSON array with Jsoniter - * - call createTensor to create an OMTensor from each object - * - construct the OMTensorList to be fed into mainGraph - */ - private static OMTensorList readStdin() throws Exception { - BufferedReader stdin = - new BufferedReader(new InputStreamReader(System.in)); - ArrayList omtl = new ArrayList(); - - JsonIterator json = JsonIterator.parse(stdin.readLine()); - int count = 0; - while(json.readArray()) { - Data data = json.read(Data.class); - OMTensor omt = createTensor(data.buffer, data.shape, data.dtype); - omtl.add(omt); - count++; + private static final HashMap onnx2numpyType = new HashMap() { + { + put(OMTensor.ONNX_TYPE_BOOL, "|b1"); + put(OMTensor.ONNX_TYPE_INT8, "|i1"); + put(OMTensor.ONNX_TYPE_UINT8, "|u1"); + put(OMTensor.ONNX_TYPE_INT16, numpyEndian + "i2"); + put(OMTensor.ONNX_TYPE_UINT16, numpyEndian + "u2"); + put(OMTensor.ONNX_TYPE_INT32, numpyEndian + "i4"); + put(OMTensor.ONNX_TYPE_UINT32, numpyEndian + "u4"); + put(OMTensor.ONNX_TYPE_INT64, numpyEndian + "i8"); + put(OMTensor.ONNX_TYPE_UINT64, numpyEndian + "u8"); + put(OMTensor.ONNX_TYPE_FLOAT16, numpyEndian + "f2"); + put(OMTensor.ONNX_TYPE_FLOAT, numpyEndian + "f4"); + put(OMTensor.ONNX_TYPE_DOUBLE, numpyEndian + "f8"); + // numpy documentation: datatype S is zero-terminated bytes (not recommended) + // https://numpy.org/doc/stable/reference/arrays.dtypes.html + put(OMTensor.ONNX_TYPE_STRING, "|S"); + } + }; + + private static OMTensor createTensor(String buffer, long[] shape, String dtype) { + /* + * We need a ByteBuffer for OMTensor but ByteBuffer.wrap(bytes) + * does NOT work. Because wrap simply creates a "view" of the + * byte[] as ByteBuffer. The backing byte[] is a Java object + * but the JNI wrapper is expecting a real direct ByteBuffer + * to hold the data to be given to the native code. + */ + byte[] bytes = Base64.getDecoder().decode(buffer); + ByteBuffer data = ByteBuffer.allocateDirect(bytes.length); + data.put(bytes); + + String e = dtype.substring(0, 1); + ByteOrder endian = numpy2javaEndian.get(e); + + String t = dtype.substring(1); + Integer otype = numpy2onnxType.get(t); + + return new OMTensor(data, shape, endian, + otype == null ? -1 : otype.intValue()); } - OMTensor[] omts = new OMTensor[count]; - return new OMTensorList(omtl.toArray(omts)); - } - - /* Model inputs are read from stdin encoded in JSON. This routine will - * - * - read JSON from stdin - * - decode JSON array with Jackson - * - call createTensor to create an OMTensor from each object - * - construct the OMTensorList to be fed into mainGraph - */ - /* - private static OMTensorList readStdin2() throws Exception { - ObjectMapper om = new ObjectMapper(); - Data[] data = om.readValue(System.in, Data[].class); - OMTensor[] omts = new OMTensor[data.length]; - for (int i = 0; i < data.length; i++) { - omts[i] = createTensor(data[i].buffer, data[i].shape, data[i].dtype); + + private static HashMap encodeTensor(OMTensor omt) throws Exception { + /* + * We need a byte[] for base64 encode but buffer.array() + * does NOT work. Because the buffer is backed by JNI code + * generated array, not a Java byte[] object. So base64 + * encode results in UnsupportedOperationException. + */ + ByteBuffer buffer = omt.getData(); + byte[] bytes = new byte[buffer.limit()]; + buffer.get(bytes); + + String dtype = onnx2numpyType.get(omt.getDataType()); + + HashMap map = new HashMap(); + map.put("buffer", Base64.getEncoder().encodeToString(bytes/* buffer.array() */)); + map.put("dtype", dtype); + map.put("shape", omt.getShape()); + return map; } - return new OMTensorList(omts); - } - */ - - /* Model outputs are written to stdout encoded in JSON. This routine will - * - * - loop through tensors in the OMTensorList returned from mainGraph - * - call encodeTensor to create a list of HashMap from each OMTensor - * - encode HashMap list into JSON with Jsoniter - * - write JSON to stdout - */ - private static void writeStdout(OMTensorList output) throws Exception { - ArrayList> list = new ArrayList>(); - HashMap map = new HashMap(); - OMTensor[] omts = output.getOmtArray(); - - for (int i = 0; i < omts.length; i++) { - list.add(encodeTensor(omts[i])); + + /* + * Model inputs are read from stdin encoded in JSON. This routine will + * + * - read JSON from stdin + * - decode JSON array with Jsoniter + * - call createTensor to create an OMTensor from each object + * - construct the OMTensorList to be fed into mainGraph + */ + private static OMTensorList readStdin() throws Exception { + BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in)); + ArrayList omtl = new ArrayList(); + + JsonIterator json = JsonIterator.parse(stdin.readLine()); + int count = 0; + while (json.readArray()) { + Data data = json.read(Data.class); + OMTensor omt = createTensor(data.buffer, data.shape, data.dtype); + omtl.add(omt); + count++; + } + OMTensor[] omts = new OMTensor[count]; + return new OMTensorList(omtl.toArray(omts)); } - BufferedWriter stdout = - new BufferedWriter(new OutputStreamWriter(System.out)); - stdout.write(JsonStream.serialize(list)); - stdout.flush(); - } - - /* Model outputs are written to stdout encoded in JSON. This routine will - * - * - loop through tensors in the OMTensorList returned from mainGraph - * - call encodeTensor to create a list of HashMap from each OMTensor - * - encode HashMap list into JSON with Jackson - * - write JSON to stdout - */ - /* - private static void writeStdout2(OMTensorList output) throws Exception { - ArrayList> list = new ArrayList>(); - HashMap map = new HashMap(); - OMTensor[] omts = output.getOmtArray(); - - for (int i = 0; i < omts.length; i++) { - list.add(encodeTensor(omts[i])); + /* + * Model inputs are read from stdin encoded in JSON. This routine will + * + * - read JSON from stdin + * - decode JSON array with Jackson + * - call createTensor to create an OMTensor from each object + * - construct the OMTensorList to be fed into mainGraph + */ + /* + * private static OMTensorList readStdin2() throws Exception { + * ObjectMapper om = new ObjectMapper(); + * Data[] data = om.readValue(System.in, Data[].class); + * OMTensor[] omts = new OMTensor[data.length]; + * for (int i = 0; i < data.length; i++) { + * omts[i] = createTensor(data[i].buffer, data[i].shape, data[i].dtype); + * } + * return new OMTensorList(omts); + * } + */ + + /* + * Model outputs are written to stdout encoded in JSON. This routine will + * + * - loop through tensors in the OMTensorList returned from mainGraph + * - call encodeTensor to create a list of HashMap from each OMTensor + * - encode HashMap list into JSON with Jsoniter + * - write JSON to stdout + */ + private static void writeStdout(OMTensorList output) throws Exception { + ArrayList> list = new ArrayList>(); + HashMap map = new HashMap(); + OMTensor[] omts = output.getOmtArray(); + + for (int i = 0; i < omts.length; i++) { + list.add(encodeTensor(omts[i])); + } + + BufferedWriter stdout = new BufferedWriter(new OutputStreamWriter(System.out)); + stdout.write(JsonStream.serialize(list)); + stdout.flush(); } - ObjectMapper om = new ObjectMapper(); - om.writeValue(System.out, list); - } - */ + /* + * Model outputs are written to stdout encoded in JSON. This routine will + * + * - loop through tensors in the OMTensorList returned from mainGraph + * - call encodeTensor to create a list of HashMap from each OMTensor + * - encode HashMap list into JSON with Jackson + * - write JSON to stdout + */ + /* + * private static void writeStdout2(OMTensorList output) throws Exception { + * ArrayList> list = new ArrayList>(); + * HashMap map = new HashMap(); + * OMTensor[] omts = output.getOmtArray(); + * + * for (int i = 0; i < omts.length; i++) { + * list.add(encodeTensor(omts[i])); + * } + * + * ObjectMapper om = new ObjectMapper(); + * om.writeValue(System.out, list); + * } + */ - /* Read inputs from stdin, call mainGraph, write outputs to stdout */ - public static void main(String[] args) throws Exception { - writeStdout(OMModel.mainGraph(readStdin())); - } + /* Read inputs from stdin, call mainGraph, write outputs to stdout */ + public static void main(String[] args) throws Exception { + writeStdout(OMModel.mainGraph(readStdin())); + } } From f7e34614520be372ebb6ed4546a615cc678e26e9 Mon Sep 17 00:00:00 2001 From: Megan Hampton Date: Wed, 6 Sep 2023 10:18:02 -0400 Subject: [PATCH 04/13] Change to unicode string Signed-off-by: Megan Hampton --- src/Runtime/PyExecutionSessionBase.cpp | 2 +- src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Runtime/PyExecutionSessionBase.cpp b/src/Runtime/PyExecutionSessionBase.cpp index f6c72d5fae..536b0bec81 100644 --- a/src/Runtime/PyExecutionSessionBase.cpp +++ b/src/Runtime/PyExecutionSessionBase.cpp @@ -187,7 +187,7 @@ std::vector PyExecutionSessionBase::pyRun( dtype = py::dtype("int64"); break; case (OM_DATA_TYPE)onnx::TensorProto::STRING: - dtype = py::dtype("str"); + dtype = py::dtype("str_"); break; case (OM_DATA_TYPE)onnx::TensorProto::BOOL: dtype = py::dtype("bool_"); diff --git a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java index bdc70dc7ec..de7a5df009 100644 --- a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java +++ b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java @@ -98,7 +98,7 @@ private static class Data { put("f2", OMTensor.ONNX_TYPE_FLOAT16); put("f4", OMTensor.ONNX_TYPE_FLOAT); put("f8", OMTensor.ONNX_TYPE_DOUBLE); - put("S", OMTensor.ONNX_TYPE_STRING); + put("U25", OMTensor.ONNX_TYPE_STRING); } }; @@ -116,9 +116,9 @@ private static class Data { put(OMTensor.ONNX_TYPE_FLOAT16, numpyEndian + "f2"); put(OMTensor.ONNX_TYPE_FLOAT, numpyEndian + "f4"); put(OMTensor.ONNX_TYPE_DOUBLE, numpyEndian + "f8"); - // numpy documentation: datatype S is zero-terminated bytes (not recommended) + // Unicode string: 25-character string // https://numpy.org/doc/stable/reference/arrays.dtypes.html - put(OMTensor.ONNX_TYPE_STRING, "|S"); + put(OMTensor.ONNX_TYPE_STRING, "|U25"); } }; From 9166c2878faa117195021e71e3c63f2fefba921b Mon Sep 17 00:00:00 2001 From: Megan Hampton Date: Wed, 6 Sep 2023 12:16:23 -0400 Subject: [PATCH 05/13] Keep trying Signed-off-by: Megan Hampton --- src/Runtime/PyExecutionSessionBase.cpp | 4 ++-- src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Runtime/PyExecutionSessionBase.cpp b/src/Runtime/PyExecutionSessionBase.cpp index 536b0bec81..3ef6e60013 100644 --- a/src/Runtime/PyExecutionSessionBase.cpp +++ b/src/Runtime/PyExecutionSessionBase.cpp @@ -105,7 +105,7 @@ std::vector PyExecutionSessionBase::pyRun( dtype = ONNX_TYPE_INT32; else if (py::isinstance>(inputPyArray)) dtype = ONNX_TYPE_INT64; - else if (py::isinstance>(inputPyArray)) + else if (py::isinstance>(inputPyArray)) dtype = ONNX_TYPE_STRING; else if (py::isinstance>(inputPyArray)) dtype = ONNX_TYPE_BOOL; @@ -187,7 +187,7 @@ std::vector PyExecutionSessionBase::pyRun( dtype = py::dtype("int64"); break; case (OM_DATA_TYPE)onnx::TensorProto::STRING: - dtype = py::dtype("str_"); + dtype = py::dtype("str"); break; case (OM_DATA_TYPE)onnx::TensorProto::BOOL: dtype = py::dtype("bool_"); diff --git a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java index de7a5df009..707ad274c1 100644 --- a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java +++ b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java @@ -116,7 +116,7 @@ private static class Data { put(OMTensor.ONNX_TYPE_FLOAT16, numpyEndian + "f2"); put(OMTensor.ONNX_TYPE_FLOAT, numpyEndian + "f4"); put(OMTensor.ONNX_TYPE_DOUBLE, numpyEndian + "f8"); - // Unicode string: 25-character string + // numpy documentation: Unicode string // https://numpy.org/doc/stable/reference/arrays.dtypes.html put(OMTensor.ONNX_TYPE_STRING, "|U25"); } From c9b48c433596adb316f849b09c362dee0890f2a2 Mon Sep 17 00:00:00 2001 From: Megan Hampton Date: Wed, 6 Sep 2023 12:44:29 -0400 Subject: [PATCH 06/13] Test Signed-off-by: Megan Hampton --- src/Runtime/PyExecutionSessionBase.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Runtime/PyExecutionSessionBase.cpp b/src/Runtime/PyExecutionSessionBase.cpp index 3ef6e60013..1895a5ede6 100644 --- a/src/Runtime/PyExecutionSessionBase.cpp +++ b/src/Runtime/PyExecutionSessionBase.cpp @@ -105,8 +105,8 @@ std::vector PyExecutionSessionBase::pyRun( dtype = ONNX_TYPE_INT32; else if (py::isinstance>(inputPyArray)) dtype = ONNX_TYPE_INT64; - else if (py::isinstance>(inputPyArray)) - dtype = ONNX_TYPE_STRING; + // else if (py::isinstance>(inputPyArray)) + // dtype = ONNX_TYPE_STRING; else if (py::isinstance>(inputPyArray)) dtype = ONNX_TYPE_BOOL; else if (py::isinstance>(inputPyArray)) From ebd81ba3477abbe854bf51b19527fa813a5a9911 Mon Sep 17 00:00:00 2001 From: gongsu832 Date: Tue, 5 Sep 2023 17:58:02 -0400 Subject: [PATCH 07/13] Do not remove LLVM "internal" options (#2475) * - Do not remove LLVM "internal" options such as --debug - Clean up some header includes and library link dependencies Signed-off-by: Gong Su * Clang format Signed-off-by: Gong Su * Fix general options not showing with --help-hidden|--help-list-hidden Signed-off-by: Gong Su --------- Signed-off-by: Gong Su Signed-off-by: Megan Hampton --- src/Compiler/CMakeLists.txt | 6 +----- src/Compiler/CompilerOptions.cpp | 8 +++++++- src/Compiler/CompilerUtils.cpp | 1 - src/Compiler/OnnxMlirCompiler.cpp | 1 - 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Compiler/CMakeLists.txt b/src/Compiler/CMakeLists.txt index 33abd59397..1b086237be 100644 --- a/src/Compiler/CMakeLists.txt +++ b/src/Compiler/CMakeLists.txt @@ -152,7 +152,7 @@ add_onnx_mlir_library(OMCompilerUtils EXCLUDE_FROM_OM_LIBS DEPENDS - ExternalUtil + ExternalUtil llc opt @@ -163,12 +163,10 @@ add_onnx_mlir_library(OMCompilerUtils ${ONNX_MLIR_SRC_ROOT}/include LINK_LIBS PUBLIC - ${OMLibs} OMCompilerDialects OMCompilerPasses OMAccelerator OMVersion - MLIRIR # Link LLVM libraries necessary to query which target architectures # are configured. @@ -193,7 +191,6 @@ add_onnx_mlir_library(OMCompiler DEPENDS OMCompilerUtils - ExternalUtil INCLUDE_DIRS PRIVATE ${FILE_GENERATE_DIR} @@ -204,7 +201,6 @@ add_onnx_mlir_library(OMCompiler EXCLUDE_FROM_OM_LIBS LINK_LIBS PRIVATE - OMCompilerDialects OMCompilerUtils ) diff --git a/src/Compiler/CompilerOptions.cpp b/src/Compiler/CompilerOptions.cpp index e0dfe37a0f..2d6e78964b 100644 --- a/src/Compiler/CompilerOptions.cpp +++ b/src/Compiler/CompilerOptions.cpp @@ -1010,7 +1010,13 @@ std::string getToolPath( // result in a unknown option error. void removeUnrelatedOptions( const std::vector Categories) { - llvm::cl::HideUnrelatedOptions(Categories); + // Do not remove LLVM "internal" options such as --debug + // that do not have a category (and therefore placed + // under the general category). So we add the general + // category to the list of not-really-hidden options. + std::vector optCategories(Categories); + optCategories.push_back(&llvm::cl::getGeneralCategory()); + llvm::cl::HideUnrelatedOptions(optCategories); llvm::StringMap &optMap = llvm::cl::getRegisteredOptions(); diff --git a/src/Compiler/CompilerUtils.cpp b/src/Compiler/CompilerUtils.cpp index 44c9dbefa0..284e717b35 100644 --- a/src/Compiler/CompilerUtils.cpp +++ b/src/Compiler/CompilerUtils.cpp @@ -39,7 +39,6 @@ #include "src/Compiler/CompilerOptions.hpp" #include "src/Compiler/CompilerPasses.hpp" #include "src/Compiler/HeapReporter.hpp" -#include "src/Dialect/ONNX/ONNXDialect.hpp" #include "src/Version/Version.hpp" #include diff --git a/src/Compiler/OnnxMlirCompiler.cpp b/src/Compiler/OnnxMlirCompiler.cpp index 2cd345c214..a1345e7c49 100644 --- a/src/Compiler/OnnxMlirCompiler.cpp +++ b/src/Compiler/OnnxMlirCompiler.cpp @@ -10,7 +10,6 @@ //===----------------------------------------------------------------------===// #include "include/OnnxMlirCompiler.h" -#include "src/Compiler/CompilerDialects.hpp" #include "src/Compiler/CompilerOptions.hpp" #include "src/Compiler/CompilerUtils.hpp" #include "llvm/Support/FileSystem.h" From a05646b0ca7eae64c7743b1cac6201eec0c8ab47 Mon Sep 17 00:00:00 2001 From: Philip Lassen Date: Tue, 5 Sep 2023 17:27:14 -0700 Subject: [PATCH 08/13] Add frontend support for saturate for CastLike and QuantizeLinear (#2480) Signed-off-by: philass Co-authored-by: Soren Lassen Signed-off-by: Megan Hampton --- src/Builder/OpBuildTable.inc | 4 +-- src/Dialect/ONNX/ONNXOps.td.inc | 28 +++++++++++-------- test/mlir/onnx/onnx_shape_inference.mlir | 8 +++--- .../functiontest_attrwithdefault.onnxtext | 4 +-- utils/gen_onnx_mlir.py | 4 +-- 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/Builder/OpBuildTable.inc b/src/Builder/OpBuildTable.inc index b04431db4a..fb12f255d8 100644 --- a/src/Builder/OpBuildTable.inc +++ b/src/Builder/OpBuildTable.inc @@ -29,7 +29,7 @@ op_dialect_version_map_["BitwiseOr"] = {18}; op_dialect_version_map_["BitwiseXor"] = {18}; op_dialect_version_map_["BlackmanWindow"] = {17}; op_dialect_version_map_["Cast"] = {19}; -op_dialect_version_map_["CastLike"] = {15}; +op_dialect_version_map_["CastLike"] = {19}; op_dialect_version_map_["CastMap"] = {1}; op_dialect_version_map_["CategoryMapper"] = {1}; op_dialect_version_map_["Ceil"] = {13}; @@ -137,7 +137,7 @@ op_dialect_version_map_["Pad"] = {18, 13, 11, 2}; op_dialect_version_map_["Pow"] = {15}; op_dialect_version_map_["QLinearConv"] = {10}; op_dialect_version_map_["QLinearMatMul"] = {10}; -op_dialect_version_map_["QuantizeLinear"] = {13}; +op_dialect_version_map_["QuantizeLinear"] = {19}; op_dialect_version_map_["RNN"] = {14}; op_dialect_version_map_["RandomNormal"] = {1}; op_dialect_version_map_["RandomNormalLike"] = {1}; diff --git a/src/Dialect/ONNX/ONNXOps.td.inc b/src/Dialect/ONNX/ONNXOps.td.inc index 03cdae080c..445f88e318 100644 --- a/src/Dialect/ONNX/ONNXOps.td.inc +++ b/src/Dialect/ONNX/ONNXOps.td.inc @@ -898,9 +898,10 @@ def ONNXCastLikeOp:ONNX_Op<"CastLike", the same data type as the elements of the second input tensor. See documentation of the Cast operator for further details. }]; - let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$input, - AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$target_type); - let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$output); + let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$input, + AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$target_type, + DefaultValuedAttr:$saturate); + let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$output); let extraClassDeclaration = [{ static int getNumberOfOperands() { return 2; @@ -5685,15 +5686,20 @@ def ONNXQuantizeLinearOp:ONNX_Op<"QuantizeLinear", let description = [{ The linear quantization operator. It consumes a high precision tensor, a scale, and a zero point to compute the low precision / quantized tensor. The scale factor and zero point must have same shape, and can be either a scalar for per-tensor / per layer quantization, or a 1-D tensor for per-axis quantization. - The quantization formula is y = saturate ((x / y_scale) + y_zero_point). + The quantization formula is `y = saturate ((x / y_scale) + y_zero_point)`. For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8. - For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details. 'y_zero_point' and 'y' must have same type. - }]; - let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[I32]>]>:$x, - TensorOf<[F32]>:$y_scale, - AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, NoneType]>:$y_zero_point, - DefaultValuedAttr:$axis); - let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>]>:$y); + For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details. + 'y_zero_point' and 'y' must have same type. + 'y_zero_point' is usually not used for quantization to float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz, + but the quantization formula remains the same for consistency and + the type of the attribute 'y_zero_point' still determines the quantization type. + }]; + let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$x, + AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$y_scale, + AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>, NoneType]>:$y_zero_point, + DefaultValuedAttr:$axis, + DefaultValuedAttr:$saturate); + let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$y); let extraClassDeclaration = [{ static int getNumberOfOperands() { return 3; diff --git a/test/mlir/onnx/onnx_shape_inference.mlir b/test/mlir/onnx/onnx_shape_inference.mlir index eb7196d54a..c84805fa35 100644 --- a/test/mlir/onnx/onnx_shape_inference.mlir +++ b/test/mlir/onnx/onnx_shape_inference.mlir @@ -1710,7 +1710,7 @@ func.func @test_castlike_1(%arg0 : tensor<2x3x4xf32>, %arg1 : tensor<2xf16>) -> "onnx.Return"(%1) : (tensor<*xf16>) -> () // CHECK-LABEL: test_castlike_1 - // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16> + // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) {saturate = 1 : si64} : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16> // CHECK: onnx.Return [[RES]] : tensor<2x3x4xf16> } @@ -1739,7 +1739,7 @@ func.func @test_quantize_linear_1(%arg0 : tensor<5x2x3x4xf32>, %arg1 : tensor) -> () // CHECK-LABEL: test_quantize_linear_1 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xi8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xi8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xi8> } @@ -1750,7 +1750,7 @@ func.func @test_quantize_linear_2(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor) -> () // CHECK-LABEL: test_quantize_linear_2 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xui8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xui8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8> } @@ -1762,7 +1762,7 @@ func.func @test_quantize_linear_3(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor) -> () // CHECK-LABEL: test_quantize_linear_3 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, none) -> tensor<5x2x3x4xui8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, none) -> tensor<5x2x3x4xui8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8> } diff --git a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext index 6b515dd8d9..8cd8468602 100644 --- a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext +++ b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext @@ -24,10 +24,10 @@ myfun (x) => (y) { // CHECK-LABEL: func.func @main_graph // CHECK-SAME: ([[PARAM_0_:%.+]]: tensor) -> tensor attributes {input_names = ["x"], output_names = ["y"]} { // CHECK: [[VAR_0_:%.+]] = onnx.Constant {value_float = 2.000000e+00 : f32} : tensor -// CHECK: [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) : (tensor, tensor) -> tensor +// CHECK: [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor, tensor) -> tensor // CHECK-DAG: [[VAR_2_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_1_]]) : (tensor, tensor) -> tensor // CHECK-DAG: [[VAR_3_:%.+]] = onnx.Constant {value_float = 1.000000e+00 : f32} : tensor -// CHECK: [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) : (tensor, tensor) -> tensor +// CHECK: [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor, tensor) -> tensor // CHECK: [[VAR_5_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_4_]]) : (tensor, tensor) -> tensor // CHECK: [[VAR_6_:%.+]] = "onnx.Add"([[VAR_2_]], [[VAR_5_]]) : (tensor, tensor) -> tensor // CHECK: onnx.Return [[VAR_6_]] : tensor diff --git a/utils/gen_onnx_mlir.py b/utils/gen_onnx_mlir.py index 4e50c6a724..a793508896 100755 --- a/utils/gen_onnx_mlir.py +++ b/utils/gen_onnx_mlir.py @@ -98,7 +98,7 @@ 'BitwiseXor': [18], 'BlackmanWindow': [17], 'Cast': [19], - 'CastLike': [15], + 'CastLike': [19], 'CastMap': [1], 'CategoryMapper': [1], 'Ceil': [13], @@ -206,7 +206,7 @@ 'Pow': [15], 'QLinearConv': [10], 'QLinearMatMul': [10], - 'QuantizeLinear': [13], + 'QuantizeLinear': [19], 'RNN': [14], 'RandomNormal': [1], 'RandomNormalLike': [1], From 653718ae4b94d6e3f3beff5ea30df8553a69e5b2 Mon Sep 17 00:00:00 2001 From: Megan Hampton Date: Wed, 6 Sep 2023 13:18:51 -0400 Subject: [PATCH 09/13] Revert "Add frontend support for saturate for CastLike and QuantizeLinear (#2480)" This reverts commit fe7da41d4cea779efc16c6fe4881d8e1fb154395. Signed-off-by: Megan Hampton --- src/Builder/OpBuildTable.inc | 4 +-- src/Dialect/ONNX/ONNXOps.td.inc | 28 ++++++++----------- test/mlir/onnx/onnx_shape_inference.mlir | 8 +++--- .../functiontest_attrwithdefault.onnxtext | 4 +-- utils/gen_onnx_mlir.py | 4 +-- 5 files changed, 21 insertions(+), 27 deletions(-) diff --git a/src/Builder/OpBuildTable.inc b/src/Builder/OpBuildTable.inc index fb12f255d8..b04431db4a 100644 --- a/src/Builder/OpBuildTable.inc +++ b/src/Builder/OpBuildTable.inc @@ -29,7 +29,7 @@ op_dialect_version_map_["BitwiseOr"] = {18}; op_dialect_version_map_["BitwiseXor"] = {18}; op_dialect_version_map_["BlackmanWindow"] = {17}; op_dialect_version_map_["Cast"] = {19}; -op_dialect_version_map_["CastLike"] = {19}; +op_dialect_version_map_["CastLike"] = {15}; op_dialect_version_map_["CastMap"] = {1}; op_dialect_version_map_["CategoryMapper"] = {1}; op_dialect_version_map_["Ceil"] = {13}; @@ -137,7 +137,7 @@ op_dialect_version_map_["Pad"] = {18, 13, 11, 2}; op_dialect_version_map_["Pow"] = {15}; op_dialect_version_map_["QLinearConv"] = {10}; op_dialect_version_map_["QLinearMatMul"] = {10}; -op_dialect_version_map_["QuantizeLinear"] = {19}; +op_dialect_version_map_["QuantizeLinear"] = {13}; op_dialect_version_map_["RNN"] = {14}; op_dialect_version_map_["RandomNormal"] = {1}; op_dialect_version_map_["RandomNormalLike"] = {1}; diff --git a/src/Dialect/ONNX/ONNXOps.td.inc b/src/Dialect/ONNX/ONNXOps.td.inc index 445f88e318..03cdae080c 100644 --- a/src/Dialect/ONNX/ONNXOps.td.inc +++ b/src/Dialect/ONNX/ONNXOps.td.inc @@ -898,10 +898,9 @@ def ONNXCastLikeOp:ONNX_Op<"CastLike", the same data type as the elements of the second input tensor. See documentation of the Cast operator for further details. }]; - let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$input, - AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$target_type, - DefaultValuedAttr:$saturate); - let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$output); + let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$input, + AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$target_type); + let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$output); let extraClassDeclaration = [{ static int getNumberOfOperands() { return 2; @@ -5686,20 +5685,15 @@ def ONNXQuantizeLinearOp:ONNX_Op<"QuantizeLinear", let description = [{ The linear quantization operator. It consumes a high precision tensor, a scale, and a zero point to compute the low precision / quantized tensor. The scale factor and zero point must have same shape, and can be either a scalar for per-tensor / per layer quantization, or a 1-D tensor for per-axis quantization. - The quantization formula is `y = saturate ((x / y_scale) + y_zero_point)`. + The quantization formula is y = saturate ((x / y_scale) + y_zero_point). For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8. - For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details. - 'y_zero_point' and 'y' must have same type. - 'y_zero_point' is usually not used for quantization to float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz, - but the quantization formula remains the same for consistency and - the type of the attribute 'y_zero_point' still determines the quantization type. - }]; - let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$x, - AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$y_scale, - AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>, NoneType]>:$y_zero_point, - DefaultValuedAttr:$axis, - DefaultValuedAttr:$saturate); - let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$y); + For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details. 'y_zero_point' and 'y' must have same type. + }]; + let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[I32]>]>:$x, + TensorOf<[F32]>:$y_scale, + AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, NoneType]>:$y_zero_point, + DefaultValuedAttr:$axis); + let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>]>:$y); let extraClassDeclaration = [{ static int getNumberOfOperands() { return 3; diff --git a/test/mlir/onnx/onnx_shape_inference.mlir b/test/mlir/onnx/onnx_shape_inference.mlir index c84805fa35..eb7196d54a 100644 --- a/test/mlir/onnx/onnx_shape_inference.mlir +++ b/test/mlir/onnx/onnx_shape_inference.mlir @@ -1710,7 +1710,7 @@ func.func @test_castlike_1(%arg0 : tensor<2x3x4xf32>, %arg1 : tensor<2xf16>) -> "onnx.Return"(%1) : (tensor<*xf16>) -> () // CHECK-LABEL: test_castlike_1 - // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) {saturate = 1 : si64} : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16> + // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16> // CHECK: onnx.Return [[RES]] : tensor<2x3x4xf16> } @@ -1739,7 +1739,7 @@ func.func @test_quantize_linear_1(%arg0 : tensor<5x2x3x4xf32>, %arg1 : tensor) -> () // CHECK-LABEL: test_quantize_linear_1 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xi8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xi8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xi8> } @@ -1750,7 +1750,7 @@ func.func @test_quantize_linear_2(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor) -> () // CHECK-LABEL: test_quantize_linear_2 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xui8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xui8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8> } @@ -1762,7 +1762,7 @@ func.func @test_quantize_linear_3(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor) -> () // CHECK-LABEL: test_quantize_linear_3 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, none) -> tensor<5x2x3x4xui8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, none) -> tensor<5x2x3x4xui8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8> } diff --git a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext index 8cd8468602..6b515dd8d9 100644 --- a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext +++ b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext @@ -24,10 +24,10 @@ myfun (x) => (y) { // CHECK-LABEL: func.func @main_graph // CHECK-SAME: ([[PARAM_0_:%.+]]: tensor) -> tensor attributes {input_names = ["x"], output_names = ["y"]} { // CHECK: [[VAR_0_:%.+]] = onnx.Constant {value_float = 2.000000e+00 : f32} : tensor -// CHECK: [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor, tensor) -> tensor +// CHECK: [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) : (tensor, tensor) -> tensor // CHECK-DAG: [[VAR_2_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_1_]]) : (tensor, tensor) -> tensor // CHECK-DAG: [[VAR_3_:%.+]] = onnx.Constant {value_float = 1.000000e+00 : f32} : tensor -// CHECK: [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor, tensor) -> tensor +// CHECK: [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) : (tensor, tensor) -> tensor // CHECK: [[VAR_5_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_4_]]) : (tensor, tensor) -> tensor // CHECK: [[VAR_6_:%.+]] = "onnx.Add"([[VAR_2_]], [[VAR_5_]]) : (tensor, tensor) -> tensor // CHECK: onnx.Return [[VAR_6_]] : tensor diff --git a/utils/gen_onnx_mlir.py b/utils/gen_onnx_mlir.py index a793508896..4e50c6a724 100755 --- a/utils/gen_onnx_mlir.py +++ b/utils/gen_onnx_mlir.py @@ -98,7 +98,7 @@ 'BitwiseXor': [18], 'BlackmanWindow': [17], 'Cast': [19], - 'CastLike': [19], + 'CastLike': [15], 'CastMap': [1], 'CategoryMapper': [1], 'Ceil': [13], @@ -206,7 +206,7 @@ 'Pow': [15], 'QLinearConv': [10], 'QLinearMatMul': [10], - 'QuantizeLinear': [19], + 'QuantizeLinear': [13], 'RNN': [14], 'RandomNormal': [1], 'RandomNormalLike': [1], From 61856c0f066d2c621c75066de14dcbf4e6812c0d Mon Sep 17 00:00:00 2001 From: Megan Hampton Date: Wed, 6 Sep 2023 13:19:34 -0400 Subject: [PATCH 10/13] Revert "Do not remove LLVM "internal" options (#2475)" This reverts commit c9c56314b4097b0802a36bf10b7bc297b3314322. Signed-off-by: Megan Hampton --- src/Compiler/CMakeLists.txt | 6 +++++- src/Compiler/CompilerOptions.cpp | 8 +------- src/Compiler/CompilerUtils.cpp | 1 + src/Compiler/OnnxMlirCompiler.cpp | 1 + 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Compiler/CMakeLists.txt b/src/Compiler/CMakeLists.txt index 1b086237be..33abd59397 100644 --- a/src/Compiler/CMakeLists.txt +++ b/src/Compiler/CMakeLists.txt @@ -152,7 +152,7 @@ add_onnx_mlir_library(OMCompilerUtils EXCLUDE_FROM_OM_LIBS DEPENDS - ExternalUtil + ExternalUtil llc opt @@ -163,10 +163,12 @@ add_onnx_mlir_library(OMCompilerUtils ${ONNX_MLIR_SRC_ROOT}/include LINK_LIBS PUBLIC + ${OMLibs} OMCompilerDialects OMCompilerPasses OMAccelerator OMVersion + MLIRIR # Link LLVM libraries necessary to query which target architectures # are configured. @@ -191,6 +193,7 @@ add_onnx_mlir_library(OMCompiler DEPENDS OMCompilerUtils + ExternalUtil INCLUDE_DIRS PRIVATE ${FILE_GENERATE_DIR} @@ -201,6 +204,7 @@ add_onnx_mlir_library(OMCompiler EXCLUDE_FROM_OM_LIBS LINK_LIBS PRIVATE + OMCompilerDialects OMCompilerUtils ) diff --git a/src/Compiler/CompilerOptions.cpp b/src/Compiler/CompilerOptions.cpp index 2d6e78964b..e0dfe37a0f 100644 --- a/src/Compiler/CompilerOptions.cpp +++ b/src/Compiler/CompilerOptions.cpp @@ -1010,13 +1010,7 @@ std::string getToolPath( // result in a unknown option error. void removeUnrelatedOptions( const std::vector Categories) { - // Do not remove LLVM "internal" options such as --debug - // that do not have a category (and therefore placed - // under the general category). So we add the general - // category to the list of not-really-hidden options. - std::vector optCategories(Categories); - optCategories.push_back(&llvm::cl::getGeneralCategory()); - llvm::cl::HideUnrelatedOptions(optCategories); + llvm::cl::HideUnrelatedOptions(Categories); llvm::StringMap &optMap = llvm::cl::getRegisteredOptions(); diff --git a/src/Compiler/CompilerUtils.cpp b/src/Compiler/CompilerUtils.cpp index 284e717b35..44c9dbefa0 100644 --- a/src/Compiler/CompilerUtils.cpp +++ b/src/Compiler/CompilerUtils.cpp @@ -39,6 +39,7 @@ #include "src/Compiler/CompilerOptions.hpp" #include "src/Compiler/CompilerPasses.hpp" #include "src/Compiler/HeapReporter.hpp" +#include "src/Dialect/ONNX/ONNXDialect.hpp" #include "src/Version/Version.hpp" #include diff --git a/src/Compiler/OnnxMlirCompiler.cpp b/src/Compiler/OnnxMlirCompiler.cpp index a1345e7c49..2cd345c214 100644 --- a/src/Compiler/OnnxMlirCompiler.cpp +++ b/src/Compiler/OnnxMlirCompiler.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "include/OnnxMlirCompiler.h" +#include "src/Compiler/CompilerDialects.hpp" #include "src/Compiler/CompilerOptions.hpp" #include "src/Compiler/CompilerUtils.hpp" #include "llvm/Support/FileSystem.h" From 718335c9859d3d3faac165a6393605f1c01bdc14 Mon Sep 17 00:00:00 2001 From: gongsu832 Date: Tue, 5 Sep 2023 17:58:02 -0400 Subject: [PATCH 11/13] Do not remove LLVM "internal" options (#2475) * - Do not remove LLVM "internal" options such as --debug - Clean up some header includes and library link dependencies Signed-off-by: Gong Su * Clang format Signed-off-by: Gong Su * Fix general options not showing with --help-hidden|--help-list-hidden Signed-off-by: Gong Su --------- Signed-off-by: Gong Su Signed-off-by: Megan Hampton --- src/Compiler/CMakeLists.txt | 6 +----- src/Compiler/CompilerOptions.cpp | 8 +++++++- src/Compiler/CompilerUtils.cpp | 1 - src/Compiler/OnnxMlirCompiler.cpp | 1 - 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Compiler/CMakeLists.txt b/src/Compiler/CMakeLists.txt index 33abd59397..1b086237be 100644 --- a/src/Compiler/CMakeLists.txt +++ b/src/Compiler/CMakeLists.txt @@ -152,7 +152,7 @@ add_onnx_mlir_library(OMCompilerUtils EXCLUDE_FROM_OM_LIBS DEPENDS - ExternalUtil + ExternalUtil llc opt @@ -163,12 +163,10 @@ add_onnx_mlir_library(OMCompilerUtils ${ONNX_MLIR_SRC_ROOT}/include LINK_LIBS PUBLIC - ${OMLibs} OMCompilerDialects OMCompilerPasses OMAccelerator OMVersion - MLIRIR # Link LLVM libraries necessary to query which target architectures # are configured. @@ -193,7 +191,6 @@ add_onnx_mlir_library(OMCompiler DEPENDS OMCompilerUtils - ExternalUtil INCLUDE_DIRS PRIVATE ${FILE_GENERATE_DIR} @@ -204,7 +201,6 @@ add_onnx_mlir_library(OMCompiler EXCLUDE_FROM_OM_LIBS LINK_LIBS PRIVATE - OMCompilerDialects OMCompilerUtils ) diff --git a/src/Compiler/CompilerOptions.cpp b/src/Compiler/CompilerOptions.cpp index e0dfe37a0f..2d6e78964b 100644 --- a/src/Compiler/CompilerOptions.cpp +++ b/src/Compiler/CompilerOptions.cpp @@ -1010,7 +1010,13 @@ std::string getToolPath( // result in a unknown option error. void removeUnrelatedOptions( const std::vector Categories) { - llvm::cl::HideUnrelatedOptions(Categories); + // Do not remove LLVM "internal" options such as --debug + // that do not have a category (and therefore placed + // under the general category). So we add the general + // category to the list of not-really-hidden options. + std::vector optCategories(Categories); + optCategories.push_back(&llvm::cl::getGeneralCategory()); + llvm::cl::HideUnrelatedOptions(optCategories); llvm::StringMap &optMap = llvm::cl::getRegisteredOptions(); diff --git a/src/Compiler/CompilerUtils.cpp b/src/Compiler/CompilerUtils.cpp index 44c9dbefa0..284e717b35 100644 --- a/src/Compiler/CompilerUtils.cpp +++ b/src/Compiler/CompilerUtils.cpp @@ -39,7 +39,6 @@ #include "src/Compiler/CompilerOptions.hpp" #include "src/Compiler/CompilerPasses.hpp" #include "src/Compiler/HeapReporter.hpp" -#include "src/Dialect/ONNX/ONNXDialect.hpp" #include "src/Version/Version.hpp" #include diff --git a/src/Compiler/OnnxMlirCompiler.cpp b/src/Compiler/OnnxMlirCompiler.cpp index 2cd345c214..a1345e7c49 100644 --- a/src/Compiler/OnnxMlirCompiler.cpp +++ b/src/Compiler/OnnxMlirCompiler.cpp @@ -10,7 +10,6 @@ //===----------------------------------------------------------------------===// #include "include/OnnxMlirCompiler.h" -#include "src/Compiler/CompilerDialects.hpp" #include "src/Compiler/CompilerOptions.hpp" #include "src/Compiler/CompilerUtils.hpp" #include "llvm/Support/FileSystem.h" From 8097425bd0fcfdec264dadced6c52268a1bfc564 Mon Sep 17 00:00:00 2001 From: Philip Lassen Date: Tue, 5 Sep 2023 17:27:14 -0700 Subject: [PATCH 12/13] Add frontend support for saturate for CastLike and QuantizeLinear (#2480) Signed-off-by: philass Co-authored-by: Soren Lassen Signed-off-by: Megan Hampton --- src/Builder/OpBuildTable.inc | 4 +-- src/Dialect/ONNX/ONNXOps.td.inc | 28 +++++++++++-------- test/mlir/onnx/onnx_shape_inference.mlir | 8 +++--- .../functiontest_attrwithdefault.onnxtext | 4 +-- utils/gen_onnx_mlir.py | 4 +-- 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/Builder/OpBuildTable.inc b/src/Builder/OpBuildTable.inc index b04431db4a..fb12f255d8 100644 --- a/src/Builder/OpBuildTable.inc +++ b/src/Builder/OpBuildTable.inc @@ -29,7 +29,7 @@ op_dialect_version_map_["BitwiseOr"] = {18}; op_dialect_version_map_["BitwiseXor"] = {18}; op_dialect_version_map_["BlackmanWindow"] = {17}; op_dialect_version_map_["Cast"] = {19}; -op_dialect_version_map_["CastLike"] = {15}; +op_dialect_version_map_["CastLike"] = {19}; op_dialect_version_map_["CastMap"] = {1}; op_dialect_version_map_["CategoryMapper"] = {1}; op_dialect_version_map_["Ceil"] = {13}; @@ -137,7 +137,7 @@ op_dialect_version_map_["Pad"] = {18, 13, 11, 2}; op_dialect_version_map_["Pow"] = {15}; op_dialect_version_map_["QLinearConv"] = {10}; op_dialect_version_map_["QLinearMatMul"] = {10}; -op_dialect_version_map_["QuantizeLinear"] = {13}; +op_dialect_version_map_["QuantizeLinear"] = {19}; op_dialect_version_map_["RNN"] = {14}; op_dialect_version_map_["RandomNormal"] = {1}; op_dialect_version_map_["RandomNormalLike"] = {1}; diff --git a/src/Dialect/ONNX/ONNXOps.td.inc b/src/Dialect/ONNX/ONNXOps.td.inc index 03cdae080c..445f88e318 100644 --- a/src/Dialect/ONNX/ONNXOps.td.inc +++ b/src/Dialect/ONNX/ONNXOps.td.inc @@ -898,9 +898,10 @@ def ONNXCastLikeOp:ONNX_Op<"CastLike", the same data type as the elements of the second input tensor. See documentation of the Cast operator for further details. }]; - let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$input, - AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$target_type); - let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$output); + let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$input, + AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$target_type, + DefaultValuedAttr:$saturate); + let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$output); let extraClassDeclaration = [{ static int getNumberOfOperands() { return 2; @@ -5685,15 +5686,20 @@ def ONNXQuantizeLinearOp:ONNX_Op<"QuantizeLinear", let description = [{ The linear quantization operator. It consumes a high precision tensor, a scale, and a zero point to compute the low precision / quantized tensor. The scale factor and zero point must have same shape, and can be either a scalar for per-tensor / per layer quantization, or a 1-D tensor for per-axis quantization. - The quantization formula is y = saturate ((x / y_scale) + y_zero_point). + The quantization formula is `y = saturate ((x / y_scale) + y_zero_point)`. For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8. - For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details. 'y_zero_point' and 'y' must have same type. - }]; - let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[I32]>]>:$x, - TensorOf<[F32]>:$y_scale, - AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, NoneType]>:$y_zero_point, - DefaultValuedAttr:$axis); - let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>]>:$y); + For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details. + 'y_zero_point' and 'y' must have same type. + 'y_zero_point' is usually not used for quantization to float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz, + but the quantization formula remains the same for consistency and + the type of the attribute 'y_zero_point' still determines the quantization type. + }]; + let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$x, + AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$y_scale, + AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>, NoneType]>:$y_zero_point, + DefaultValuedAttr:$axis, + DefaultValuedAttr:$saturate); + let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$y); let extraClassDeclaration = [{ static int getNumberOfOperands() { return 3; diff --git a/test/mlir/onnx/onnx_shape_inference.mlir b/test/mlir/onnx/onnx_shape_inference.mlir index eb7196d54a..c84805fa35 100644 --- a/test/mlir/onnx/onnx_shape_inference.mlir +++ b/test/mlir/onnx/onnx_shape_inference.mlir @@ -1710,7 +1710,7 @@ func.func @test_castlike_1(%arg0 : tensor<2x3x4xf32>, %arg1 : tensor<2xf16>) -> "onnx.Return"(%1) : (tensor<*xf16>) -> () // CHECK-LABEL: test_castlike_1 - // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16> + // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) {saturate = 1 : si64} : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16> // CHECK: onnx.Return [[RES]] : tensor<2x3x4xf16> } @@ -1739,7 +1739,7 @@ func.func @test_quantize_linear_1(%arg0 : tensor<5x2x3x4xf32>, %arg1 : tensor) -> () // CHECK-LABEL: test_quantize_linear_1 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xi8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xi8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xi8> } @@ -1750,7 +1750,7 @@ func.func @test_quantize_linear_2(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor) -> () // CHECK-LABEL: test_quantize_linear_2 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xui8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, tensor) -> tensor<5x2x3x4xui8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8> } @@ -1762,7 +1762,7 @@ func.func @test_quantize_linear_3(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor) -> () // CHECK-LABEL: test_quantize_linear_3 - // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, none) -> tensor<5x2x3x4xui8> + // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor, none) -> tensor<5x2x3x4xui8> // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8> } diff --git a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext index 6b515dd8d9..8cd8468602 100644 --- a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext +++ b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext @@ -24,10 +24,10 @@ myfun (x) => (y) { // CHECK-LABEL: func.func @main_graph // CHECK-SAME: ([[PARAM_0_:%.+]]: tensor) -> tensor attributes {input_names = ["x"], output_names = ["y"]} { // CHECK: [[VAR_0_:%.+]] = onnx.Constant {value_float = 2.000000e+00 : f32} : tensor -// CHECK: [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) : (tensor, tensor) -> tensor +// CHECK: [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor, tensor) -> tensor // CHECK-DAG: [[VAR_2_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_1_]]) : (tensor, tensor) -> tensor // CHECK-DAG: [[VAR_3_:%.+]] = onnx.Constant {value_float = 1.000000e+00 : f32} : tensor -// CHECK: [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) : (tensor, tensor) -> tensor +// CHECK: [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor, tensor) -> tensor // CHECK: [[VAR_5_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_4_]]) : (tensor, tensor) -> tensor // CHECK: [[VAR_6_:%.+]] = "onnx.Add"([[VAR_2_]], [[VAR_5_]]) : (tensor, tensor) -> tensor // CHECK: onnx.Return [[VAR_6_]] : tensor diff --git a/utils/gen_onnx_mlir.py b/utils/gen_onnx_mlir.py index 4e50c6a724..a793508896 100755 --- a/utils/gen_onnx_mlir.py +++ b/utils/gen_onnx_mlir.py @@ -98,7 +98,7 @@ 'BitwiseXor': [18], 'BlackmanWindow': [17], 'Cast': [19], - 'CastLike': [15], + 'CastLike': [19], 'CastMap': [1], 'CategoryMapper': [1], 'Ceil': [13], @@ -206,7 +206,7 @@ 'Pow': [15], 'QLinearConv': [10], 'QLinearMatMul': [10], - 'QuantizeLinear': [13], + 'QuantizeLinear': [19], 'RNN': [14], 'RandomNormal': [1], 'RandomNormalLike': [1], From 13cbadaf5ca8c94c46e4097432b22b252d6a7264 Mon Sep 17 00:00:00 2001 From: Megan Hampton Date: Wed, 6 Sep 2023 14:19:01 -0400 Subject: [PATCH 13/13] Add data type for python script Signed-off-by: Megan Hampton --- test/backend/inference_backend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/backend/inference_backend.py b/test/backend/inference_backend.py index 9e58bd08c4..4a4e57e8fc 100644 --- a/test/backend/inference_backend.py +++ b/test/backend/inference_backend.py @@ -1375,6 +1375,7 @@ def JniExecutionSession(jar_name, inputs): "f2": np.float16, "f4": np.float32, "f8": np.float64, + "U25": np.str_, } # print('stdout=' + str(procStdout), file=sys.stderr)