From 1b2987b38f44069c73bec3f2e5b7e27f9972dace Mon Sep 17 00:00:00 2001
From: Megan Hampton <hamptonm@us.ibm.com>
Date: Tue, 5 Sep 2023 14:01:18 -0400
Subject: [PATCH 01/13] Add support for string data type

Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 src/Runtime/OMUnique.inc                           | 3 ++-
 src/Runtime/PyExecutionSessionBase.cpp             | 3 ++-
 src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java | 4 ++++
 3 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/Runtime/OMUnique.inc b/src/Runtime/OMUnique.inc
index 89f7952993..cdef189f99 100644
--- a/src/Runtime/OMUnique.inc
+++ b/src/Runtime/OMUnique.inc
@@ -94,7 +94,8 @@ int isLessNum(void *arg1, void *arg2, OM_DATA_TYPE dataType) {
     return *((int32_t *)arg1) < *((int32_t *)arg2);
   case ONNX_TYPE_INT64:
     return *((int64_t *)arg1) < *((int64_t *)arg2);
-  // case ONNX_TYPE_STRING:
+  case ONNX_TYPE_STRING:
+    return *((const char **)arg1) < *((const char **)arg2);
   case ONNX_TYPE_BOOL:
     return *((bool *)arg1) < *((bool *)arg2);
   // case ONNX_TYPE_FLOAT16:
diff --git a/src/Runtime/PyExecutionSessionBase.cpp b/src/Runtime/PyExecutionSessionBase.cpp
index 4cd1d62a0d..7db2a6947a 100644
--- a/src/Runtime/PyExecutionSessionBase.cpp
+++ b/src/Runtime/PyExecutionSessionBase.cpp
@@ -105,7 +105,8 @@ std::vector<py::array> PyExecutionSessionBase::pyRun(
       dtype = ONNX_TYPE_INT32;
     else if (py::isinstance<py::array_t<std::int64_t>>(inputPyArray))
       dtype = ONNX_TYPE_INT64;
-    // string type missing
+    else if (py::isinstance<py::array_t<const char*>>(inputPyArray))
+      dtype = ONNX_TYPE_STRING;
     else if (py::isinstance<py::array_t<bool>>(inputPyArray))
       dtype = ONNX_TYPE_BOOL;
     else if (py::isinstance<py::array_t<float_16>>(inputPyArray))
diff --git a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
index f671f20d2b..b189555ae2 100644
--- a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
+++ b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
@@ -95,6 +95,7 @@ private static class Data {
 	    put("f2", OMTensor.ONNX_TYPE_FLOAT16);
 	    put("f4", OMTensor.ONNX_TYPE_FLOAT);
 	    put("f8", OMTensor.ONNX_TYPE_DOUBLE);
+		put("S", OMTensor.ONNX_TYPE_STRING);
 	}};
 
     private static final HashMap<Integer, String> onnx2numpyType =
@@ -111,6 +112,9 @@ private static class Data {
 	    put(OMTensor.ONNX_TYPE_FLOAT16, numpyEndian+"f2");
 	    put(OMTensor.ONNX_TYPE_FLOAT,   numpyEndian+"f4");
 	    put(OMTensor.ONNX_TYPE_DOUBLE,  numpyEndian+"f8");
+        // numpy documentation: datatype S is zero-terminated bytes (not recommended)
+		// https://numpy.org/doc/stable/reference/arrays.dtypes.html
+		put(OMTensor.ONNX_TYPE_STRING,             "|S");
 	}};
 
     private static OMTensor createTensor(String buffer, long[] shape, String dtype) {

From 7acc6f5bfc5ffb593808155aa3963fcf6bda2a4e Mon Sep 17 00:00:00 2001
From: Megan Hampton <hamptonm@us.ibm.com>
Date: Tue, 5 Sep 2023 14:08:28 -0400
Subject: [PATCH 02/13] Fix format

Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 src/Runtime/PyExecutionSessionBase.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Runtime/PyExecutionSessionBase.cpp b/src/Runtime/PyExecutionSessionBase.cpp
index 7db2a6947a..f6c72d5fae 100644
--- a/src/Runtime/PyExecutionSessionBase.cpp
+++ b/src/Runtime/PyExecutionSessionBase.cpp
@@ -105,7 +105,7 @@ std::vector<py::array> PyExecutionSessionBase::pyRun(
       dtype = ONNX_TYPE_INT32;
     else if (py::isinstance<py::array_t<std::int64_t>>(inputPyArray))
       dtype = ONNX_TYPE_INT64;
-    else if (py::isinstance<py::array_t<const char*>>(inputPyArray))
+    else if (py::isinstance<py::array_t<const char *>>(inputPyArray))
       dtype = ONNX_TYPE_STRING;
     else if (py::isinstance<py::array_t<bool>>(inputPyArray))
       dtype = ONNX_TYPE_BOOL;

From 563d95f4bab0b3222a9251a357615d4f1d526c57 Mon Sep 17 00:00:00 2001
From: Megan Hampton <hamptonm@us.ibm.com>
Date: Tue, 5 Sep 2023 14:11:47 -0400
Subject: [PATCH 03/13] Linter

Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 .../jni/src/com/ibm/onnxmlir/OMRunner.java    | 402 +++++++++---------
 1 file changed, 206 insertions(+), 196 deletions(-)

diff --git a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
index b189555ae2..bdc70dc7ec 100644
--- a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
+++ b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
@@ -41,211 +41,221 @@
  * output tensors converted from Java HashMap. The tensor
  * data are base64 encoded.
  */
-public class OMRunner
-{
-    private static class Data {
-	/* For Jsoniter, field names match the keys in the JSON */
-	String buffer;
-	String dtype;
-	long[] shape;
-
-	/* For Jackson, words after removing get/set of getter/setter
-	 * methods with the first letter lowercased match the keys in
-	 * the JSON.
-	 */
-	/*
-	String getBuffer() { return this.buffer; }
-	void setBuffer(String buffer) { this.buffer = buffer; }
-
-	String getDtype() { return this.dtype; }
-	void setDtype(String dtype) { this.dtype = dtype; }
-
-	long[] getShape() { return this.shape; }
-	void setShape(long[] shape) { this.shape = shape; }
-	*/
-    }
-
-    private static final HashMap<String, ByteOrder> numpy2javaEndian =
-	new HashMap<String, ByteOrder>() {{
-	    put(">", ByteOrder.BIG_ENDIAN);
-	    put("<", ByteOrder.LITTLE_ENDIAN);
-	    put("=", ByteOrder.nativeOrder());
-	    put("|", ByteOrder.nativeOrder());
-	}};
-
-    private static final HashMap<ByteOrder, String> java2numpyEndian =
-	new HashMap<ByteOrder, String>() {{
-	    put(ByteOrder.BIG_ENDIAN,    ">");;
-	    put(ByteOrder.LITTLE_ENDIAN, "<");;
-	}};
-    private static final String numpyEndian =
-	java2numpyEndian.get(ByteOrder.nativeOrder());
-
-    private static final HashMap<String, Integer> numpy2onnxType =
-	new HashMap<String, Integer>() {{
-	    put("b1", OMTensor.ONNX_TYPE_BOOL);
-	    put("i1", OMTensor.ONNX_TYPE_INT8);
-	    put("u1", OMTensor.ONNX_TYPE_UINT8);
-	    put("i2", OMTensor.ONNX_TYPE_INT16);
-	    put("u2", OMTensor.ONNX_TYPE_UINT16);
-	    put("i4", OMTensor.ONNX_TYPE_INT32);
-	    put("u4", OMTensor.ONNX_TYPE_UINT32);
-	    put("i8", OMTensor.ONNX_TYPE_INT64);
-	    put("u8", OMTensor.ONNX_TYPE_UINT64);
-	    put("f2", OMTensor.ONNX_TYPE_FLOAT16);
-	    put("f4", OMTensor.ONNX_TYPE_FLOAT);
-	    put("f8", OMTensor.ONNX_TYPE_DOUBLE);
-		put("S", OMTensor.ONNX_TYPE_STRING);
-	}};
-
-    private static final HashMap<Integer, String> onnx2numpyType =
-	new HashMap<Integer, String>() {{
-	    put(OMTensor.ONNX_TYPE_BOOL,               "|b1");
-	    put(OMTensor.ONNX_TYPE_INT8,               "|i1");
-	    put(OMTensor.ONNX_TYPE_UINT8,              "|u1");
-	    put(OMTensor.ONNX_TYPE_INT16,   numpyEndian+"i2");
-	    put(OMTensor.ONNX_TYPE_UINT16,  numpyEndian+"u2");
-	    put(OMTensor.ONNX_TYPE_INT32,   numpyEndian+"i4");
-	    put(OMTensor.ONNX_TYPE_UINT32,  numpyEndian+"u4");
-	    put(OMTensor.ONNX_TYPE_INT64,   numpyEndian+"i8");
-	    put(OMTensor.ONNX_TYPE_UINT64,  numpyEndian+"u8");
-	    put(OMTensor.ONNX_TYPE_FLOAT16, numpyEndian+"f2");
-	    put(OMTensor.ONNX_TYPE_FLOAT,   numpyEndian+"f4");
-	    put(OMTensor.ONNX_TYPE_DOUBLE,  numpyEndian+"f8");
-        // numpy documentation: datatype S is zero-terminated bytes (not recommended)
-		// https://numpy.org/doc/stable/reference/arrays.dtypes.html
-		put(OMTensor.ONNX_TYPE_STRING,             "|S");
-	}};
-
-    private static OMTensor createTensor(String buffer, long[] shape, String dtype) {
-	/* We need a ByteBuffer for OMTensor but ByteBuffer.wrap(bytes)
-	 * does NOT work. Because wrap simply creates a "view" of the
-	 * byte[] as ByteBuffer. The backing byte[] is a Java object
-	 * but the JNI wrapper is expecting a real direct ByteBuffer
-	 * to hold the data to be given to the native code.
-	 */
-	byte[] bytes = Base64.getDecoder().decode(buffer);
-	ByteBuffer data = ByteBuffer.allocateDirect(bytes.length);
-        data.put(bytes);
+public class OMRunner {
+	private static class Data {
+		/* For Jsoniter, field names match the keys in the JSON */
+		String buffer;
+		String dtype;
+		long[] shape;
+
+		/*
+		 * For Jackson, words after removing get/set of getter/setter
+		 * methods with the first letter lowercased match the keys in
+		 * the JSON.
+		 */
+		/*
+		 * String getBuffer() { return this.buffer; }
+		 * void setBuffer(String buffer) { this.buffer = buffer; }
+		 * 
+		 * String getDtype() { return this.dtype; }
+		 * void setDtype(String dtype) { this.dtype = dtype; }
+		 * 
+		 * long[] getShape() { return this.shape; }
+		 * void setShape(long[] shape) { this.shape = shape; }
+		 */
+	}
 
-	String e = dtype.substring(0, 1);
-	ByteOrder endian = numpy2javaEndian.get(e);
+	private static final HashMap<String, ByteOrder> numpy2javaEndian = new HashMap<String, ByteOrder>() {
+		{
+			put(">", ByteOrder.BIG_ENDIAN);
+			put("<", ByteOrder.LITTLE_ENDIAN);
+			put("=", ByteOrder.nativeOrder());
+			put("|", ByteOrder.nativeOrder());
+		}
+	};
 
-	String t = dtype.substring(1);
-	Integer otype = numpy2onnxType.get(t);
+	private static final HashMap<ByteOrder, String> java2numpyEndian = new HashMap<ByteOrder, String>() {
+		{
+			put(ByteOrder.BIG_ENDIAN, ">");
+			;
+			put(ByteOrder.LITTLE_ENDIAN, "<");
+			;
+		}
+	};
+	private static final String numpyEndian = java2numpyEndian.get(ByteOrder.nativeOrder());
 
-	return new OMTensor(data, shape, endian,
-			    otype == null ? -1 : otype.intValue());
-    }
+	private static final HashMap<String, Integer> numpy2onnxType = new HashMap<String, Integer>() {
+		{
+			put("b1", OMTensor.ONNX_TYPE_BOOL);
+			put("i1", OMTensor.ONNX_TYPE_INT8);
+			put("u1", OMTensor.ONNX_TYPE_UINT8);
+			put("i2", OMTensor.ONNX_TYPE_INT16);
+			put("u2", OMTensor.ONNX_TYPE_UINT16);
+			put("i4", OMTensor.ONNX_TYPE_INT32);
+			put("u4", OMTensor.ONNX_TYPE_UINT32);
+			put("i8", OMTensor.ONNX_TYPE_INT64);
+			put("u8", OMTensor.ONNX_TYPE_UINT64);
+			put("f2", OMTensor.ONNX_TYPE_FLOAT16);
+			put("f4", OMTensor.ONNX_TYPE_FLOAT);
+			put("f8", OMTensor.ONNX_TYPE_DOUBLE);
+			put("S", OMTensor.ONNX_TYPE_STRING);
+		}
+	};
 
-    private static HashMap<String, Object> encodeTensor(OMTensor omt) throws Exception {
-	/* We need a byte[] for base64 encode but buffer.array()
-	 * does NOT work. Because the buffer is backed by JNI code
-	 * generated array, not a Java byte[] object. So base64
-	 * encode results in UnsupportedOperationException.
-	 */
-	ByteBuffer buffer = omt.getData();
-	byte[] bytes = new byte[buffer.limit()];
-	buffer.get(bytes);
-
-	String dtype = onnx2numpyType.get(omt.getDataType());
-
-	HashMap<String, Object> map = new HashMap<String, Object>();
-	map.put("buffer", Base64.getEncoder().encodeToString(bytes/*buffer.array()*/));
-	map.put("dtype", dtype);
-	map.put("shape", omt.getShape());
-	return map;
-    }
-
-    /* Model inputs are read from stdin encoded in JSON. This routine will
-     *
-     * - read JSON from stdin
-     * - decode JSON array with Jsoniter
-     * - call createTensor to create an OMTensor from each object
-     * - construct the OMTensorList to be fed into mainGraph
-     */
-    private static OMTensorList readStdin() throws Exception {
-        BufferedReader stdin =
-	    new BufferedReader(new InputStreamReader(System.in));
-	ArrayList<OMTensor> omtl = new ArrayList<OMTensor>();
-
-	JsonIterator json = JsonIterator.parse(stdin.readLine());
-	int count = 0;
-	while(json.readArray()) {
-	    Data data = json.read(Data.class);
-	    OMTensor omt = createTensor(data.buffer, data.shape, data.dtype);
-	    omtl.add(omt);
-	    count++;
+	private static final HashMap<Integer, String> onnx2numpyType = new HashMap<Integer, String>() {
+		{
+			put(OMTensor.ONNX_TYPE_BOOL, "|b1");
+			put(OMTensor.ONNX_TYPE_INT8, "|i1");
+			put(OMTensor.ONNX_TYPE_UINT8, "|u1");
+			put(OMTensor.ONNX_TYPE_INT16, numpyEndian + "i2");
+			put(OMTensor.ONNX_TYPE_UINT16, numpyEndian + "u2");
+			put(OMTensor.ONNX_TYPE_INT32, numpyEndian + "i4");
+			put(OMTensor.ONNX_TYPE_UINT32, numpyEndian + "u4");
+			put(OMTensor.ONNX_TYPE_INT64, numpyEndian + "i8");
+			put(OMTensor.ONNX_TYPE_UINT64, numpyEndian + "u8");
+			put(OMTensor.ONNX_TYPE_FLOAT16, numpyEndian + "f2");
+			put(OMTensor.ONNX_TYPE_FLOAT, numpyEndian + "f4");
+			put(OMTensor.ONNX_TYPE_DOUBLE, numpyEndian + "f8");
+			// numpy documentation: datatype S is zero-terminated bytes (not recommended)
+			// https://numpy.org/doc/stable/reference/arrays.dtypes.html
+			put(OMTensor.ONNX_TYPE_STRING, "|S");
+		}
+	};
+
+	private static OMTensor createTensor(String buffer, long[] shape, String dtype) {
+		/*
+		 * We need a ByteBuffer for OMTensor but ByteBuffer.wrap(bytes)
+		 * does NOT work. Because wrap simply creates a "view" of the
+		 * byte[] as ByteBuffer. The backing byte[] is a Java object
+		 * but the JNI wrapper is expecting a real direct ByteBuffer
+		 * to hold the data to be given to the native code.
+		 */
+		byte[] bytes = Base64.getDecoder().decode(buffer);
+		ByteBuffer data = ByteBuffer.allocateDirect(bytes.length);
+		data.put(bytes);
+
+		String e = dtype.substring(0, 1);
+		ByteOrder endian = numpy2javaEndian.get(e);
+
+		String t = dtype.substring(1);
+		Integer otype = numpy2onnxType.get(t);
+
+		return new OMTensor(data, shape, endian,
+				otype == null ? -1 : otype.intValue());
 	}
-	OMTensor[] omts = new OMTensor[count];
-	return new OMTensorList(omtl.toArray(omts));
-    }
-
-    /* Model inputs are read from stdin encoded in JSON. This routine will
-     *
-     * - read JSON from stdin
-     * - decode JSON array with Jackson
-     * - call createTensor to create an OMTensor from each object
-     * - construct the OMTensorList to be fed into mainGraph
-     */
-    /*
-    private static OMTensorList readStdin2() throws Exception {
-	ObjectMapper om = new ObjectMapper();
-	Data[] data = om.readValue(System.in, Data[].class);
-	OMTensor[] omts = new OMTensor[data.length];
-	for (int i = 0; i < data.length; i++) {
-	    omts[i] = createTensor(data[i].buffer, data[i].shape, data[i].dtype);
+
+	private static HashMap<String, Object> encodeTensor(OMTensor omt) throws Exception {
+		/*
+		 * We need a byte[] for base64 encode but buffer.array()
+		 * does NOT work. Because the buffer is backed by JNI code
+		 * generated array, not a Java byte[] object. So base64
+		 * encode results in UnsupportedOperationException.
+		 */
+		ByteBuffer buffer = omt.getData();
+		byte[] bytes = new byte[buffer.limit()];
+		buffer.get(bytes);
+
+		String dtype = onnx2numpyType.get(omt.getDataType());
+
+		HashMap<String, Object> map = new HashMap<String, Object>();
+		map.put("buffer", Base64.getEncoder().encodeToString(bytes/* buffer.array() */));
+		map.put("dtype", dtype);
+		map.put("shape", omt.getShape());
+		return map;
 	}
-	return new OMTensorList(omts);
-    }
-    */
-
-    /* Model outputs are written to stdout encoded in JSON. This routine will
-     *
-     * - loop through tensors in the OMTensorList returned from mainGraph
-     * - call encodeTensor to create a list of HashMap from each OMTensor
-     * - encode HashMap list into JSON with Jsoniter
-     * - write JSON to stdout
-     */
-    private static void writeStdout(OMTensorList output) throws Exception {
-	ArrayList<HashMap<String, Object>> list = new ArrayList<HashMap<String, Object>>();
-	HashMap<String, Object> map = new HashMap<String, Object>();
-	OMTensor[] omts = output.getOmtArray();
-
-	for (int i = 0; i < omts.length; i++) {
-	    list.add(encodeTensor(omts[i]));
+
+	/*
+	 * Model inputs are read from stdin encoded in JSON. This routine will
+	 *
+	 * - read JSON from stdin
+	 * - decode JSON array with Jsoniter
+	 * - call createTensor to create an OMTensor from each object
+	 * - construct the OMTensorList to be fed into mainGraph
+	 */
+	private static OMTensorList readStdin() throws Exception {
+		BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in));
+		ArrayList<OMTensor> omtl = new ArrayList<OMTensor>();
+
+		JsonIterator json = JsonIterator.parse(stdin.readLine());
+		int count = 0;
+		while (json.readArray()) {
+			Data data = json.read(Data.class);
+			OMTensor omt = createTensor(data.buffer, data.shape, data.dtype);
+			omtl.add(omt);
+			count++;
+		}
+		OMTensor[] omts = new OMTensor[count];
+		return new OMTensorList(omtl.toArray(omts));
 	}
 
-	BufferedWriter stdout =
-	    new BufferedWriter(new OutputStreamWriter(System.out));
-	stdout.write(JsonStream.serialize(list));
-	stdout.flush();
-    }
-
-    /* Model outputs are written to stdout encoded in JSON. This routine will
-     *
-     * - loop through tensors in the OMTensorList returned from mainGraph
-     * - call encodeTensor to create a list of HashMap from each OMTensor
-     * - encode HashMap list into JSON with Jackson
-     * - write JSON to stdout
-     */
-    /*
-    private static void writeStdout2(OMTensorList output) throws Exception {
-	ArrayList<HashMap<String, Object>> list = new ArrayList<HashMap<String, Object>>();
-	HashMap<String, Object> map = new HashMap<String, Object>();
-	OMTensor[] omts = output.getOmtArray();
-
-	for (int i = 0; i < omts.length; i++) {
-	    list.add(encodeTensor(omts[i]));
+	/*
+	 * Model inputs are read from stdin encoded in JSON. This routine will
+	 *
+	 * - read JSON from stdin
+	 * - decode JSON array with Jackson
+	 * - call createTensor to create an OMTensor from each object
+	 * - construct the OMTensorList to be fed into mainGraph
+	 */
+	/*
+	 * private static OMTensorList readStdin2() throws Exception {
+	 * ObjectMapper om = new ObjectMapper();
+	 * Data[] data = om.readValue(System.in, Data[].class);
+	 * OMTensor[] omts = new OMTensor[data.length];
+	 * for (int i = 0; i < data.length; i++) {
+	 * omts[i] = createTensor(data[i].buffer, data[i].shape, data[i].dtype);
+	 * }
+	 * return new OMTensorList(omts);
+	 * }
+	 */
+
+	/*
+	 * Model outputs are written to stdout encoded in JSON. This routine will
+	 *
+	 * - loop through tensors in the OMTensorList returned from mainGraph
+	 * - call encodeTensor to create a list of HashMap from each OMTensor
+	 * - encode HashMap list into JSON with Jsoniter
+	 * - write JSON to stdout
+	 */
+	private static void writeStdout(OMTensorList output) throws Exception {
+		ArrayList<HashMap<String, Object>> list = new ArrayList<HashMap<String, Object>>();
+		HashMap<String, Object> map = new HashMap<String, Object>();
+		OMTensor[] omts = output.getOmtArray();
+
+		for (int i = 0; i < omts.length; i++) {
+			list.add(encodeTensor(omts[i]));
+		}
+
+		BufferedWriter stdout = new BufferedWriter(new OutputStreamWriter(System.out));
+		stdout.write(JsonStream.serialize(list));
+		stdout.flush();
 	}
 
-	ObjectMapper om = new ObjectMapper();
-	om.writeValue(System.out, list);
-    }
-    */
+	/*
+	 * Model outputs are written to stdout encoded in JSON. This routine will
+	 *
+	 * - loop through tensors in the OMTensorList returned from mainGraph
+	 * - call encodeTensor to create a list of HashMap from each OMTensor
+	 * - encode HashMap list into JSON with Jackson
+	 * - write JSON to stdout
+	 */
+	/*
+	 * private static void writeStdout2(OMTensorList output) throws Exception {
+	 * ArrayList<HashMap<String, Object>> list = new ArrayList<HashMap<String,
+	 * Object>>();
+	 * HashMap<String, Object> map = new HashMap<String, Object>();
+	 * OMTensor[] omts = output.getOmtArray();
+	 * 
+	 * for (int i = 0; i < omts.length; i++) {
+	 * list.add(encodeTensor(omts[i]));
+	 * }
+	 * 
+	 * ObjectMapper om = new ObjectMapper();
+	 * om.writeValue(System.out, list);
+	 * }
+	 */
 
-    /* Read inputs from stdin, call mainGraph, write outputs to stdout */
-    public static void main(String[] args) throws Exception {
-	writeStdout(OMModel.mainGraph(readStdin()));
-    }
+	/* Read inputs from stdin, call mainGraph, write outputs to stdout */
+	public static void main(String[] args) throws Exception {
+		writeStdout(OMModel.mainGraph(readStdin()));
+	}
 }

From f7e34614520be372ebb6ed4546a615cc678e26e9 Mon Sep 17 00:00:00 2001
From: Megan Hampton <hamptonm@us.ibm.com>
Date: Wed, 6 Sep 2023 10:18:02 -0400
Subject: [PATCH 04/13] Change to unicode string

Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 src/Runtime/PyExecutionSessionBase.cpp             | 2 +-
 src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Runtime/PyExecutionSessionBase.cpp b/src/Runtime/PyExecutionSessionBase.cpp
index f6c72d5fae..536b0bec81 100644
--- a/src/Runtime/PyExecutionSessionBase.cpp
+++ b/src/Runtime/PyExecutionSessionBase.cpp
@@ -187,7 +187,7 @@ std::vector<py::array> PyExecutionSessionBase::pyRun(
       dtype = py::dtype("int64");
       break;
     case (OM_DATA_TYPE)onnx::TensorProto::STRING:
-      dtype = py::dtype("str");
+      dtype = py::dtype("str_");
       break;
     case (OM_DATA_TYPE)onnx::TensorProto::BOOL:
       dtype = py::dtype("bool_");
diff --git a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
index bdc70dc7ec..de7a5df009 100644
--- a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
+++ b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
@@ -98,7 +98,7 @@ private static class Data {
 			put("f2", OMTensor.ONNX_TYPE_FLOAT16);
 			put("f4", OMTensor.ONNX_TYPE_FLOAT);
 			put("f8", OMTensor.ONNX_TYPE_DOUBLE);
-			put("S", OMTensor.ONNX_TYPE_STRING);
+			put("U25", OMTensor.ONNX_TYPE_STRING);
 		}
 	};
 
@@ -116,9 +116,9 @@ private static class Data {
 			put(OMTensor.ONNX_TYPE_FLOAT16, numpyEndian + "f2");
 			put(OMTensor.ONNX_TYPE_FLOAT, numpyEndian + "f4");
 			put(OMTensor.ONNX_TYPE_DOUBLE, numpyEndian + "f8");
-			// numpy documentation: datatype S is zero-terminated bytes (not recommended)
+			// Unicode string: 25-character string
 			// https://numpy.org/doc/stable/reference/arrays.dtypes.html
-			put(OMTensor.ONNX_TYPE_STRING, "|S");
+			put(OMTensor.ONNX_TYPE_STRING, "|U25");
 		}
 	};
 

From 9166c2878faa117195021e71e3c63f2fefba921b Mon Sep 17 00:00:00 2001
From: Megan Hampton <hamptonm@us.ibm.com>
Date: Wed, 6 Sep 2023 12:16:23 -0400
Subject: [PATCH 05/13] Keep trying

Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 src/Runtime/PyExecutionSessionBase.cpp             | 4 ++--
 src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Runtime/PyExecutionSessionBase.cpp b/src/Runtime/PyExecutionSessionBase.cpp
index 536b0bec81..3ef6e60013 100644
--- a/src/Runtime/PyExecutionSessionBase.cpp
+++ b/src/Runtime/PyExecutionSessionBase.cpp
@@ -105,7 +105,7 @@ std::vector<py::array> PyExecutionSessionBase::pyRun(
       dtype = ONNX_TYPE_INT32;
     else if (py::isinstance<py::array_t<std::int64_t>>(inputPyArray))
       dtype = ONNX_TYPE_INT64;
-    else if (py::isinstance<py::array_t<const char *>>(inputPyArray))
+    else if (py::isinstance<py::array_t<std::string>>(inputPyArray))
       dtype = ONNX_TYPE_STRING;
     else if (py::isinstance<py::array_t<bool>>(inputPyArray))
       dtype = ONNX_TYPE_BOOL;
@@ -187,7 +187,7 @@ std::vector<py::array> PyExecutionSessionBase::pyRun(
       dtype = py::dtype("int64");
       break;
     case (OM_DATA_TYPE)onnx::TensorProto::STRING:
-      dtype = py::dtype("str_");
+      dtype = py::dtype("str");
       break;
     case (OM_DATA_TYPE)onnx::TensorProto::BOOL:
       dtype = py::dtype("bool_");
diff --git a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
index de7a5df009..707ad274c1 100644
--- a/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
+++ b/src/Runtime/jni/src/com/ibm/onnxmlir/OMRunner.java
@@ -116,7 +116,7 @@ private static class Data {
 			put(OMTensor.ONNX_TYPE_FLOAT16, numpyEndian + "f2");
 			put(OMTensor.ONNX_TYPE_FLOAT, numpyEndian + "f4");
 			put(OMTensor.ONNX_TYPE_DOUBLE, numpyEndian + "f8");
-			// Unicode string: 25-character string
+			// numpy documentation: Unicode string
 			// https://numpy.org/doc/stable/reference/arrays.dtypes.html
 			put(OMTensor.ONNX_TYPE_STRING, "|U25");
 		}

From c9b48c433596adb316f849b09c362dee0890f2a2 Mon Sep 17 00:00:00 2001
From: Megan Hampton <hamptonm@us.ibm.com>
Date: Wed, 6 Sep 2023 12:44:29 -0400
Subject: [PATCH 06/13] Test

Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 src/Runtime/PyExecutionSessionBase.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Runtime/PyExecutionSessionBase.cpp b/src/Runtime/PyExecutionSessionBase.cpp
index 3ef6e60013..1895a5ede6 100644
--- a/src/Runtime/PyExecutionSessionBase.cpp
+++ b/src/Runtime/PyExecutionSessionBase.cpp
@@ -105,8 +105,8 @@ std::vector<py::array> PyExecutionSessionBase::pyRun(
       dtype = ONNX_TYPE_INT32;
     else if (py::isinstance<py::array_t<std::int64_t>>(inputPyArray))
       dtype = ONNX_TYPE_INT64;
-    else if (py::isinstance<py::array_t<std::string>>(inputPyArray))
-      dtype = ONNX_TYPE_STRING;
+    // else if (py::isinstance<py::array_t<const char *>>(inputPyArray))
+    //   dtype = ONNX_TYPE_STRING;
     else if (py::isinstance<py::array_t<bool>>(inputPyArray))
       dtype = ONNX_TYPE_BOOL;
     else if (py::isinstance<py::array_t<float_16>>(inputPyArray))

From ebd81ba3477abbe854bf51b19527fa813a5a9911 Mon Sep 17 00:00:00 2001
From: gongsu832 <gong_su@hotmail.com>
Date: Tue, 5 Sep 2023 17:58:02 -0400
Subject: [PATCH 07/13] Do not remove LLVM "internal" options (#2475)

* - Do not remove LLVM "internal" options such as --debug
- Clean up some header includes and library link dependencies

Signed-off-by: Gong Su <gong_su@hotmail.com>

* Clang format

Signed-off-by: Gong Su <gong_su@hotmail.com>

* Fix general options not showing with --help-hidden|--help-list-hidden

Signed-off-by: Gong Su <gong_su@hotmail.com>

---------

Signed-off-by: Gong Su <gong_su@hotmail.com>
Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 src/Compiler/CMakeLists.txt       | 6 +-----
 src/Compiler/CompilerOptions.cpp  | 8 +++++++-
 src/Compiler/CompilerUtils.cpp    | 1 -
 src/Compiler/OnnxMlirCompiler.cpp | 1 -
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/Compiler/CMakeLists.txt b/src/Compiler/CMakeLists.txt
index 33abd59397..1b086237be 100644
--- a/src/Compiler/CMakeLists.txt
+++ b/src/Compiler/CMakeLists.txt
@@ -152,7 +152,7 @@ add_onnx_mlir_library(OMCompilerUtils
   EXCLUDE_FROM_OM_LIBS
 
   DEPENDS
-  ExternalUtil  
+  ExternalUtil
   llc
   opt
 
@@ -163,12 +163,10 @@ add_onnx_mlir_library(OMCompilerUtils
   ${ONNX_MLIR_SRC_ROOT}/include
 
   LINK_LIBS PUBLIC
-  ${OMLibs}
   OMCompilerDialects
   OMCompilerPasses
   OMAccelerator
   OMVersion
-  MLIRIR
 
   # Link LLVM libraries necessary to query which target architectures
   # are configured.
@@ -193,7 +191,6 @@ add_onnx_mlir_library(OMCompiler
 
   DEPENDS
   OMCompilerUtils
-  ExternalUtil
 
   INCLUDE_DIRS PRIVATE
   ${FILE_GENERATE_DIR}
@@ -204,7 +201,6 @@ add_onnx_mlir_library(OMCompiler
   EXCLUDE_FROM_OM_LIBS
 
   LINK_LIBS PRIVATE
-  OMCompilerDialects
   OMCompilerUtils
   )
 
diff --git a/src/Compiler/CompilerOptions.cpp b/src/Compiler/CompilerOptions.cpp
index e0dfe37a0f..2d6e78964b 100644
--- a/src/Compiler/CompilerOptions.cpp
+++ b/src/Compiler/CompilerOptions.cpp
@@ -1010,7 +1010,13 @@ std::string getToolPath(
 // result in a unknown option error.
 void removeUnrelatedOptions(
     const std::vector<llvm::cl::OptionCategory *> Categories) {
-  llvm::cl::HideUnrelatedOptions(Categories);
+  // Do not remove LLVM "internal" options such as --debug
+  // that do not have a category (and therefore placed
+  // under the general category). So we add the general
+  // category to the list of not-really-hidden options.
+  std::vector<llvm::cl::OptionCategory *> optCategories(Categories);
+  optCategories.push_back(&llvm::cl::getGeneralCategory());
+  llvm::cl::HideUnrelatedOptions(optCategories);
 
   llvm::StringMap<llvm::cl::Option *> &optMap =
       llvm::cl::getRegisteredOptions();
diff --git a/src/Compiler/CompilerUtils.cpp b/src/Compiler/CompilerUtils.cpp
index 44c9dbefa0..284e717b35 100644
--- a/src/Compiler/CompilerUtils.cpp
+++ b/src/Compiler/CompilerUtils.cpp
@@ -39,7 +39,6 @@
 #include "src/Compiler/CompilerOptions.hpp"
 #include "src/Compiler/CompilerPasses.hpp"
 #include "src/Compiler/HeapReporter.hpp"
-#include "src/Dialect/ONNX/ONNXDialect.hpp"
 #include "src/Version/Version.hpp"
 
 #include <fstream>
diff --git a/src/Compiler/OnnxMlirCompiler.cpp b/src/Compiler/OnnxMlirCompiler.cpp
index 2cd345c214..a1345e7c49 100644
--- a/src/Compiler/OnnxMlirCompiler.cpp
+++ b/src/Compiler/OnnxMlirCompiler.cpp
@@ -10,7 +10,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "include/OnnxMlirCompiler.h"
-#include "src/Compiler/CompilerDialects.hpp"
 #include "src/Compiler/CompilerOptions.hpp"
 #include "src/Compiler/CompilerUtils.hpp"
 #include "llvm/Support/FileSystem.h"

From a05646b0ca7eae64c7743b1cac6201eec0c8ab47 Mon Sep 17 00:00:00 2001
From: Philip Lassen <plassen@groq.com>
Date: Tue, 5 Sep 2023 17:27:14 -0700
Subject: [PATCH 08/13] Add frontend support for saturate for CastLike and
 QuantizeLinear (#2480)

Signed-off-by: philass <plassen@groq.com>
Co-authored-by: Soren Lassen <sorenlassen@gmail.com>
Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 src/Builder/OpBuildTable.inc                  |  4 +--
 src/Dialect/ONNX/ONNXOps.td.inc               | 28 +++++++++++--------
 test/mlir/onnx/onnx_shape_inference.mlir      |  8 +++---
 .../functiontest_attrwithdefault.onnxtext     |  4 +--
 utils/gen_onnx_mlir.py                        |  4 +--
 5 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/src/Builder/OpBuildTable.inc b/src/Builder/OpBuildTable.inc
index b04431db4a..fb12f255d8 100644
--- a/src/Builder/OpBuildTable.inc
+++ b/src/Builder/OpBuildTable.inc
@@ -29,7 +29,7 @@ op_dialect_version_map_["BitwiseOr"] = {18};
 op_dialect_version_map_["BitwiseXor"] = {18};
 op_dialect_version_map_["BlackmanWindow"] = {17};
 op_dialect_version_map_["Cast"] = {19};
-op_dialect_version_map_["CastLike"] = {15};
+op_dialect_version_map_["CastLike"] = {19};
 op_dialect_version_map_["CastMap"] = {1};
 op_dialect_version_map_["CategoryMapper"] = {1};
 op_dialect_version_map_["Ceil"] = {13};
@@ -137,7 +137,7 @@ op_dialect_version_map_["Pad"] = {18, 13, 11, 2};
 op_dialect_version_map_["Pow"] = {15};
 op_dialect_version_map_["QLinearConv"] = {10};
 op_dialect_version_map_["QLinearMatMul"] = {10};
-op_dialect_version_map_["QuantizeLinear"] = {13};
+op_dialect_version_map_["QuantizeLinear"] = {19};
 op_dialect_version_map_["RNN"] = {14};
 op_dialect_version_map_["RandomNormal"] = {1};
 op_dialect_version_map_["RandomNormalLike"] = {1};
diff --git a/src/Dialect/ONNX/ONNXOps.td.inc b/src/Dialect/ONNX/ONNXOps.td.inc
index 03cdae080c..445f88e318 100644
--- a/src/Dialect/ONNX/ONNXOps.td.inc
+++ b/src/Dialect/ONNX/ONNXOps.td.inc
@@ -898,9 +898,10 @@ def ONNXCastLikeOp:ONNX_Op<"CastLike",
   the same data type as the elements of the second input tensor.
   See documentation of the Cast operator for further details.
   }];
-  let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$input,
-    AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$target_type);
-  let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$output);
+  let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$input,
+    AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$target_type,
+    DefaultValuedAttr<SI64Attr, "1">:$saturate);
+  let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$output);
   let extraClassDeclaration = [{
     static int getNumberOfOperands() {
       return 2;
@@ -5685,15 +5686,20 @@ def ONNXQuantizeLinearOp:ONNX_Op<"QuantizeLinear",
   let description = [{
   The linear quantization operator. It consumes a high precision tensor, a scale, and a zero point to compute the low precision / quantized tensor.
   The scale factor and zero point must have same shape, and can be either a scalar for per-tensor / per layer quantization, or a 1-D tensor for per-axis quantization.
-  The quantization formula is y = saturate ((x / y_scale) + y_zero_point).
+  The quantization formula is `y = saturate ((x / y_scale) + y_zero_point)`.
   For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8.
-  For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details. 'y_zero_point' and 'y' must have same type.
-  }];
-  let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[I32]>]>:$x,
-    TensorOf<[F32]>:$y_scale,
-    AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, NoneType]>:$y_zero_point,
-    DefaultValuedAttr<SI64Attr, "1">:$axis);
-  let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>]>:$y);
+  For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details.
+  'y_zero_point' and 'y' must have same type.
+  'y_zero_point' is usually not used for quantization to float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz,
+  but the quantization formula remains the same for consistency and
+  the type of the attribute 'y_zero_point' still determines the quantization type.
+  }];
+  let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$x,
+    AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$y_scale,
+    AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>, NoneType]>:$y_zero_point,
+    DefaultValuedAttr<SI64Attr, "1">:$axis,
+    DefaultValuedAttr<SI64Attr, "1">:$saturate);
+  let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$y);
   let extraClassDeclaration = [{
     static int getNumberOfOperands() {
       return 3;
diff --git a/test/mlir/onnx/onnx_shape_inference.mlir b/test/mlir/onnx/onnx_shape_inference.mlir
index eb7196d54a..c84805fa35 100644
--- a/test/mlir/onnx/onnx_shape_inference.mlir
+++ b/test/mlir/onnx/onnx_shape_inference.mlir
@@ -1710,7 +1710,7 @@ func.func @test_castlike_1(%arg0 : tensor<2x3x4xf32>, %arg1 : tensor<2xf16>) ->
   "onnx.Return"(%1) : (tensor<*xf16>) -> ()
 
   // CHECK-LABEL: test_castlike_1
-  // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16>
+  // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) {saturate = 1 : si64} : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16>
   // CHECK: onnx.Return [[RES]] : tensor<2x3x4xf16>
 }
 
@@ -1739,7 +1739,7 @@ func.func @test_quantize_linear_1(%arg0 : tensor<5x2x3x4xf32>, %arg1 : tensor<f3
   "onnx.Return"(%1) {} : (tensor<*xi8>) -> ()
 
   // CHECK-LABEL: test_quantize_linear_1
-  // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<i8>) -> tensor<5x2x3x4xi8>
+  // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<i8>) -> tensor<5x2x3x4xi8>
   // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xi8>
 }
 
@@ -1750,7 +1750,7 @@ func.func @test_quantize_linear_2(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor<f32
  "onnx.Return"(%0) {} : (tensor<*xui8>) -> ()
 
  // CHECK-LABEL: test_quantize_linear_2
- // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<ui8>) -> tensor<5x2x3x4xui8>
+ // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<ui8>) -> tensor<5x2x3x4xui8>
  // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8>
 }
 
@@ -1762,7 +1762,7 @@ func.func @test_quantize_linear_3(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor<f32
  "onnx.Return"(%0) {} : (tensor<*xui8>) -> ()
 
  // CHECK-LABEL: test_quantize_linear_3
- // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, none) -> tensor<5x2x3x4xui8>
+ // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, none) -> tensor<5x2x3x4xui8>
  // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8>
 }
 
diff --git a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext
index 6b515dd8d9..8cd8468602 100644
--- a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext
+++ b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext
@@ -24,10 +24,10 @@ myfun <a: float=1.0> (x) => (y) {
 // CHECK-LABEL:  func.func @main_graph
 // CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<?xf32>) -> tensor<?xf32> attributes {input_names = ["x"], output_names = ["y"]} {
 // CHECK:           [[VAR_0_:%.+]] = onnx.Constant {value_float = 2.000000e+00 : f32} : tensor<f32>
-// CHECK:           [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
+// CHECK:           [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
 // CHECK-DAG:       [[VAR_2_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_1_]]) : (tensor<?xf32>, tensor<f32>) -> tensor<?xf32>
 // CHECK-DAG:       [[VAR_3_:%.+]] = onnx.Constant {value_float = 1.000000e+00 : f32} : tensor<f32>
-// CHECK:           [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
+// CHECK:           [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
 // CHECK:           [[VAR_5_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_4_]]) : (tensor<?xf32>, tensor<f32>) -> tensor<?xf32>
 // CHECK:           [[VAR_6_:%.+]] = "onnx.Add"([[VAR_2_]], [[VAR_5_]]) : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
 // CHECK:           onnx.Return [[VAR_6_]] : tensor<?xf32>
diff --git a/utils/gen_onnx_mlir.py b/utils/gen_onnx_mlir.py
index 4e50c6a724..a793508896 100755
--- a/utils/gen_onnx_mlir.py
+++ b/utils/gen_onnx_mlir.py
@@ -98,7 +98,7 @@
  'BitwiseXor': [18],
  'BlackmanWindow': [17],
  'Cast': [19],
- 'CastLike': [15],
+ 'CastLike': [19],
  'CastMap': [1],
  'CategoryMapper': [1],
  'Ceil': [13],
@@ -206,7 +206,7 @@
  'Pow': [15],
  'QLinearConv': [10],
  'QLinearMatMul': [10],
- 'QuantizeLinear': [13],
+ 'QuantizeLinear': [19],
  'RNN': [14],
  'RandomNormal': [1],
  'RandomNormalLike': [1],

From 653718ae4b94d6e3f3beff5ea30df8553a69e5b2 Mon Sep 17 00:00:00 2001
From: Megan Hampton <hamptonm@us.ibm.com>
Date: Wed, 6 Sep 2023 13:18:51 -0400
Subject: [PATCH 09/13] Revert "Add frontend support for saturate for CastLike
 and QuantizeLinear (#2480)"

This reverts commit fe7da41d4cea779efc16c6fe4881d8e1fb154395.

Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 src/Builder/OpBuildTable.inc                  |  4 +--
 src/Dialect/ONNX/ONNXOps.td.inc               | 28 ++++++++-----------
 test/mlir/onnx/onnx_shape_inference.mlir      |  8 +++---
 .../functiontest_attrwithdefault.onnxtext     |  4 +--
 utils/gen_onnx_mlir.py                        |  4 +--
 5 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/src/Builder/OpBuildTable.inc b/src/Builder/OpBuildTable.inc
index fb12f255d8..b04431db4a 100644
--- a/src/Builder/OpBuildTable.inc
+++ b/src/Builder/OpBuildTable.inc
@@ -29,7 +29,7 @@ op_dialect_version_map_["BitwiseOr"] = {18};
 op_dialect_version_map_["BitwiseXor"] = {18};
 op_dialect_version_map_["BlackmanWindow"] = {17};
 op_dialect_version_map_["Cast"] = {19};
-op_dialect_version_map_["CastLike"] = {19};
+op_dialect_version_map_["CastLike"] = {15};
 op_dialect_version_map_["CastMap"] = {1};
 op_dialect_version_map_["CategoryMapper"] = {1};
 op_dialect_version_map_["Ceil"] = {13};
@@ -137,7 +137,7 @@ op_dialect_version_map_["Pad"] = {18, 13, 11, 2};
 op_dialect_version_map_["Pow"] = {15};
 op_dialect_version_map_["QLinearConv"] = {10};
 op_dialect_version_map_["QLinearMatMul"] = {10};
-op_dialect_version_map_["QuantizeLinear"] = {19};
+op_dialect_version_map_["QuantizeLinear"] = {13};
 op_dialect_version_map_["RNN"] = {14};
 op_dialect_version_map_["RandomNormal"] = {1};
 op_dialect_version_map_["RandomNormalLike"] = {1};
diff --git a/src/Dialect/ONNX/ONNXOps.td.inc b/src/Dialect/ONNX/ONNXOps.td.inc
index 445f88e318..03cdae080c 100644
--- a/src/Dialect/ONNX/ONNXOps.td.inc
+++ b/src/Dialect/ONNX/ONNXOps.td.inc
@@ -898,10 +898,9 @@ def ONNXCastLikeOp:ONNX_Op<"CastLike",
   the same data type as the elements of the second input tensor.
   See documentation of the Cast operator for further details.
   }];
-  let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$input,
-    AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$target_type,
-    DefaultValuedAttr<SI64Attr, "1">:$saturate);
-  let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$output);
+  let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$input,
+    AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$target_type);
+  let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$output);
   let extraClassDeclaration = [{
     static int getNumberOfOperands() {
       return 2;
@@ -5686,20 +5685,15 @@ def ONNXQuantizeLinearOp:ONNX_Op<"QuantizeLinear",
   let description = [{
   The linear quantization operator. It consumes a high precision tensor, a scale, and a zero point to compute the low precision / quantized tensor.
   The scale factor and zero point must have same shape, and can be either a scalar for per-tensor / per layer quantization, or a 1-D tensor for per-axis quantization.
-  The quantization formula is `y = saturate ((x / y_scale) + y_zero_point)`.
+  The quantization formula is y = saturate ((x / y_scale) + y_zero_point).
   For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8.
-  For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details.
-  'y_zero_point' and 'y' must have same type.
-  'y_zero_point' is usually not used for quantization to float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz,
-  but the quantization formula remains the same for consistency and
-  the type of the attribute 'y_zero_point' still determines the quantization type.
-  }];
-  let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$x,
-    AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$y_scale,
-    AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>, NoneType]>:$y_zero_point,
-    DefaultValuedAttr<SI64Attr, "1">:$axis,
-    DefaultValuedAttr<SI64Attr, "1">:$saturate);
-  let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$y);
+  For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details. 'y_zero_point' and 'y' must have same type.
+  }];
+  let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[I32]>]>:$x,
+    TensorOf<[F32]>:$y_scale,
+    AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, NoneType]>:$y_zero_point,
+    DefaultValuedAttr<SI64Attr, "1">:$axis);
+  let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>]>:$y);
   let extraClassDeclaration = [{
     static int getNumberOfOperands() {
       return 3;
diff --git a/test/mlir/onnx/onnx_shape_inference.mlir b/test/mlir/onnx/onnx_shape_inference.mlir
index c84805fa35..eb7196d54a 100644
--- a/test/mlir/onnx/onnx_shape_inference.mlir
+++ b/test/mlir/onnx/onnx_shape_inference.mlir
@@ -1710,7 +1710,7 @@ func.func @test_castlike_1(%arg0 : tensor<2x3x4xf32>, %arg1 : tensor<2xf16>) ->
   "onnx.Return"(%1) : (tensor<*xf16>) -> ()
 
   // CHECK-LABEL: test_castlike_1
-  // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) {saturate = 1 : si64} : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16>
+  // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16>
   // CHECK: onnx.Return [[RES]] : tensor<2x3x4xf16>
 }
 
@@ -1739,7 +1739,7 @@ func.func @test_quantize_linear_1(%arg0 : tensor<5x2x3x4xf32>, %arg1 : tensor<f3
   "onnx.Return"(%1) {} : (tensor<*xi8>) -> ()
 
   // CHECK-LABEL: test_quantize_linear_1
-  // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<i8>) -> tensor<5x2x3x4xi8>
+  // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<i8>) -> tensor<5x2x3x4xi8>
   // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xi8>
 }
 
@@ -1750,7 +1750,7 @@ func.func @test_quantize_linear_2(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor<f32
  "onnx.Return"(%0) {} : (tensor<*xui8>) -> ()
 
  // CHECK-LABEL: test_quantize_linear_2
- // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<ui8>) -> tensor<5x2x3x4xui8>
+ // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<ui8>) -> tensor<5x2x3x4xui8>
  // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8>
 }
 
@@ -1762,7 +1762,7 @@ func.func @test_quantize_linear_3(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor<f32
  "onnx.Return"(%0) {} : (tensor<*xui8>) -> ()
 
  // CHECK-LABEL: test_quantize_linear_3
- // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, none) -> tensor<5x2x3x4xui8>
+ // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, none) -> tensor<5x2x3x4xui8>
  // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8>
 }
 
diff --git a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext
index 8cd8468602..6b515dd8d9 100644
--- a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext
+++ b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext
@@ -24,10 +24,10 @@ myfun <a: float=1.0> (x) => (y) {
 // CHECK-LABEL:  func.func @main_graph
 // CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<?xf32>) -> tensor<?xf32> attributes {input_names = ["x"], output_names = ["y"]} {
 // CHECK:           [[VAR_0_:%.+]] = onnx.Constant {value_float = 2.000000e+00 : f32} : tensor<f32>
-// CHECK:           [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
+// CHECK:           [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
 // CHECK-DAG:       [[VAR_2_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_1_]]) : (tensor<?xf32>, tensor<f32>) -> tensor<?xf32>
 // CHECK-DAG:       [[VAR_3_:%.+]] = onnx.Constant {value_float = 1.000000e+00 : f32} : tensor<f32>
-// CHECK:           [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
+// CHECK:           [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
 // CHECK:           [[VAR_5_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_4_]]) : (tensor<?xf32>, tensor<f32>) -> tensor<?xf32>
 // CHECK:           [[VAR_6_:%.+]] = "onnx.Add"([[VAR_2_]], [[VAR_5_]]) : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
 // CHECK:           onnx.Return [[VAR_6_]] : tensor<?xf32>
diff --git a/utils/gen_onnx_mlir.py b/utils/gen_onnx_mlir.py
index a793508896..4e50c6a724 100755
--- a/utils/gen_onnx_mlir.py
+++ b/utils/gen_onnx_mlir.py
@@ -98,7 +98,7 @@
  'BitwiseXor': [18],
  'BlackmanWindow': [17],
  'Cast': [19],
- 'CastLike': [19],
+ 'CastLike': [15],
  'CastMap': [1],
  'CategoryMapper': [1],
  'Ceil': [13],
@@ -206,7 +206,7 @@
  'Pow': [15],
  'QLinearConv': [10],
  'QLinearMatMul': [10],
- 'QuantizeLinear': [19],
+ 'QuantizeLinear': [13],
  'RNN': [14],
  'RandomNormal': [1],
  'RandomNormalLike': [1],

From 61856c0f066d2c621c75066de14dcbf4e6812c0d Mon Sep 17 00:00:00 2001
From: Megan Hampton <hamptonm@us.ibm.com>
Date: Wed, 6 Sep 2023 13:19:34 -0400
Subject: [PATCH 10/13] Revert "Do not remove LLVM "internal" options (#2475)"

This reverts commit c9c56314b4097b0802a36bf10b7bc297b3314322.

Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 src/Compiler/CMakeLists.txt       | 6 +++++-
 src/Compiler/CompilerOptions.cpp  | 8 +-------
 src/Compiler/CompilerUtils.cpp    | 1 +
 src/Compiler/OnnxMlirCompiler.cpp | 1 +
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/Compiler/CMakeLists.txt b/src/Compiler/CMakeLists.txt
index 1b086237be..33abd59397 100644
--- a/src/Compiler/CMakeLists.txt
+++ b/src/Compiler/CMakeLists.txt
@@ -152,7 +152,7 @@ add_onnx_mlir_library(OMCompilerUtils
   EXCLUDE_FROM_OM_LIBS
 
   DEPENDS
-  ExternalUtil
+  ExternalUtil  
   llc
   opt
 
@@ -163,10 +163,12 @@ add_onnx_mlir_library(OMCompilerUtils
   ${ONNX_MLIR_SRC_ROOT}/include
 
   LINK_LIBS PUBLIC
+  ${OMLibs}
   OMCompilerDialects
   OMCompilerPasses
   OMAccelerator
   OMVersion
+  MLIRIR
 
   # Link LLVM libraries necessary to query which target architectures
   # are configured.
@@ -191,6 +193,7 @@ add_onnx_mlir_library(OMCompiler
 
   DEPENDS
   OMCompilerUtils
+  ExternalUtil
 
   INCLUDE_DIRS PRIVATE
   ${FILE_GENERATE_DIR}
@@ -201,6 +204,7 @@ add_onnx_mlir_library(OMCompiler
   EXCLUDE_FROM_OM_LIBS
 
   LINK_LIBS PRIVATE
+  OMCompilerDialects
   OMCompilerUtils
   )
 
diff --git a/src/Compiler/CompilerOptions.cpp b/src/Compiler/CompilerOptions.cpp
index 2d6e78964b..e0dfe37a0f 100644
--- a/src/Compiler/CompilerOptions.cpp
+++ b/src/Compiler/CompilerOptions.cpp
@@ -1010,13 +1010,7 @@ std::string getToolPath(
 // result in a unknown option error.
 void removeUnrelatedOptions(
     const std::vector<llvm::cl::OptionCategory *> Categories) {
-  // Do not remove LLVM "internal" options such as --debug
-  // that do not have a category (and therefore placed
-  // under the general category). So we add the general
-  // category to the list of not-really-hidden options.
-  std::vector<llvm::cl::OptionCategory *> optCategories(Categories);
-  optCategories.push_back(&llvm::cl::getGeneralCategory());
-  llvm::cl::HideUnrelatedOptions(optCategories);
+  llvm::cl::HideUnrelatedOptions(Categories);
 
   llvm::StringMap<llvm::cl::Option *> &optMap =
       llvm::cl::getRegisteredOptions();
diff --git a/src/Compiler/CompilerUtils.cpp b/src/Compiler/CompilerUtils.cpp
index 284e717b35..44c9dbefa0 100644
--- a/src/Compiler/CompilerUtils.cpp
+++ b/src/Compiler/CompilerUtils.cpp
@@ -39,6 +39,7 @@
 #include "src/Compiler/CompilerOptions.hpp"
 #include "src/Compiler/CompilerPasses.hpp"
 #include "src/Compiler/HeapReporter.hpp"
+#include "src/Dialect/ONNX/ONNXDialect.hpp"
 #include "src/Version/Version.hpp"
 
 #include <fstream>
diff --git a/src/Compiler/OnnxMlirCompiler.cpp b/src/Compiler/OnnxMlirCompiler.cpp
index a1345e7c49..2cd345c214 100644
--- a/src/Compiler/OnnxMlirCompiler.cpp
+++ b/src/Compiler/OnnxMlirCompiler.cpp
@@ -10,6 +10,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "include/OnnxMlirCompiler.h"
+#include "src/Compiler/CompilerDialects.hpp"
 #include "src/Compiler/CompilerOptions.hpp"
 #include "src/Compiler/CompilerUtils.hpp"
 #include "llvm/Support/FileSystem.h"

From 718335c9859d3d3faac165a6393605f1c01bdc14 Mon Sep 17 00:00:00 2001
From: gongsu832 <gong_su@hotmail.com>
Date: Tue, 5 Sep 2023 17:58:02 -0400
Subject: [PATCH 11/13] Do not remove LLVM "internal" options (#2475)

* - Do not remove LLVM "internal" options such as --debug
- Clean up some header includes and library link dependencies

Signed-off-by: Gong Su <gong_su@hotmail.com>

* Clang format

Signed-off-by: Gong Su <gong_su@hotmail.com>

* Fix general options not showing with --help-hidden|--help-list-hidden

Signed-off-by: Gong Su <gong_su@hotmail.com>

---------

Signed-off-by: Gong Su <gong_su@hotmail.com>
Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 src/Compiler/CMakeLists.txt       | 6 +-----
 src/Compiler/CompilerOptions.cpp  | 8 +++++++-
 src/Compiler/CompilerUtils.cpp    | 1 -
 src/Compiler/OnnxMlirCompiler.cpp | 1 -
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/Compiler/CMakeLists.txt b/src/Compiler/CMakeLists.txt
index 33abd59397..1b086237be 100644
--- a/src/Compiler/CMakeLists.txt
+++ b/src/Compiler/CMakeLists.txt
@@ -152,7 +152,7 @@ add_onnx_mlir_library(OMCompilerUtils
   EXCLUDE_FROM_OM_LIBS
 
   DEPENDS
-  ExternalUtil  
+  ExternalUtil
   llc
   opt
 
@@ -163,12 +163,10 @@ add_onnx_mlir_library(OMCompilerUtils
   ${ONNX_MLIR_SRC_ROOT}/include
 
   LINK_LIBS PUBLIC
-  ${OMLibs}
   OMCompilerDialects
   OMCompilerPasses
   OMAccelerator
   OMVersion
-  MLIRIR
 
   # Link LLVM libraries necessary to query which target architectures
   # are configured.
@@ -193,7 +191,6 @@ add_onnx_mlir_library(OMCompiler
 
   DEPENDS
   OMCompilerUtils
-  ExternalUtil
 
   INCLUDE_DIRS PRIVATE
   ${FILE_GENERATE_DIR}
@@ -204,7 +201,6 @@ add_onnx_mlir_library(OMCompiler
   EXCLUDE_FROM_OM_LIBS
 
   LINK_LIBS PRIVATE
-  OMCompilerDialects
   OMCompilerUtils
   )
 
diff --git a/src/Compiler/CompilerOptions.cpp b/src/Compiler/CompilerOptions.cpp
index e0dfe37a0f..2d6e78964b 100644
--- a/src/Compiler/CompilerOptions.cpp
+++ b/src/Compiler/CompilerOptions.cpp
@@ -1010,7 +1010,13 @@ std::string getToolPath(
 // result in a unknown option error.
 void removeUnrelatedOptions(
     const std::vector<llvm::cl::OptionCategory *> Categories) {
-  llvm::cl::HideUnrelatedOptions(Categories);
+  // Do not remove LLVM "internal" options such as --debug
+  // that do not have a category (and therefore placed
+  // under the general category). So we add the general
+  // category to the list of not-really-hidden options.
+  std::vector<llvm::cl::OptionCategory *> optCategories(Categories);
+  optCategories.push_back(&llvm::cl::getGeneralCategory());
+  llvm::cl::HideUnrelatedOptions(optCategories);
 
   llvm::StringMap<llvm::cl::Option *> &optMap =
       llvm::cl::getRegisteredOptions();
diff --git a/src/Compiler/CompilerUtils.cpp b/src/Compiler/CompilerUtils.cpp
index 44c9dbefa0..284e717b35 100644
--- a/src/Compiler/CompilerUtils.cpp
+++ b/src/Compiler/CompilerUtils.cpp
@@ -39,7 +39,6 @@
 #include "src/Compiler/CompilerOptions.hpp"
 #include "src/Compiler/CompilerPasses.hpp"
 #include "src/Compiler/HeapReporter.hpp"
-#include "src/Dialect/ONNX/ONNXDialect.hpp"
 #include "src/Version/Version.hpp"
 
 #include <fstream>
diff --git a/src/Compiler/OnnxMlirCompiler.cpp b/src/Compiler/OnnxMlirCompiler.cpp
index 2cd345c214..a1345e7c49 100644
--- a/src/Compiler/OnnxMlirCompiler.cpp
+++ b/src/Compiler/OnnxMlirCompiler.cpp
@@ -10,7 +10,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "include/OnnxMlirCompiler.h"
-#include "src/Compiler/CompilerDialects.hpp"
 #include "src/Compiler/CompilerOptions.hpp"
 #include "src/Compiler/CompilerUtils.hpp"
 #include "llvm/Support/FileSystem.h"

From 8097425bd0fcfdec264dadced6c52268a1bfc564 Mon Sep 17 00:00:00 2001
From: Philip Lassen <plassen@groq.com>
Date: Tue, 5 Sep 2023 17:27:14 -0700
Subject: [PATCH 12/13] Add frontend support for saturate for CastLike and
 QuantizeLinear (#2480)

Signed-off-by: philass <plassen@groq.com>
Co-authored-by: Soren Lassen <sorenlassen@gmail.com>
Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 src/Builder/OpBuildTable.inc                  |  4 +--
 src/Dialect/ONNX/ONNXOps.td.inc               | 28 +++++++++++--------
 test/mlir/onnx/onnx_shape_inference.mlir      |  8 +++---
 .../functiontest_attrwithdefault.onnxtext     |  4 +--
 utils/gen_onnx_mlir.py                        |  4 +--
 5 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/src/Builder/OpBuildTable.inc b/src/Builder/OpBuildTable.inc
index b04431db4a..fb12f255d8 100644
--- a/src/Builder/OpBuildTable.inc
+++ b/src/Builder/OpBuildTable.inc
@@ -29,7 +29,7 @@ op_dialect_version_map_["BitwiseOr"] = {18};
 op_dialect_version_map_["BitwiseXor"] = {18};
 op_dialect_version_map_["BlackmanWindow"] = {17};
 op_dialect_version_map_["Cast"] = {19};
-op_dialect_version_map_["CastLike"] = {15};
+op_dialect_version_map_["CastLike"] = {19};
 op_dialect_version_map_["CastMap"] = {1};
 op_dialect_version_map_["CategoryMapper"] = {1};
 op_dialect_version_map_["Ceil"] = {13};
@@ -137,7 +137,7 @@ op_dialect_version_map_["Pad"] = {18, 13, 11, 2};
 op_dialect_version_map_["Pow"] = {15};
 op_dialect_version_map_["QLinearConv"] = {10};
 op_dialect_version_map_["QLinearMatMul"] = {10};
-op_dialect_version_map_["QuantizeLinear"] = {13};
+op_dialect_version_map_["QuantizeLinear"] = {19};
 op_dialect_version_map_["RNN"] = {14};
 op_dialect_version_map_["RandomNormal"] = {1};
 op_dialect_version_map_["RandomNormalLike"] = {1};
diff --git a/src/Dialect/ONNX/ONNXOps.td.inc b/src/Dialect/ONNX/ONNXOps.td.inc
index 03cdae080c..445f88e318 100644
--- a/src/Dialect/ONNX/ONNXOps.td.inc
+++ b/src/Dialect/ONNX/ONNXOps.td.inc
@@ -898,9 +898,10 @@ def ONNXCastLikeOp:ONNX_Op<"CastLike",
   the same data type as the elements of the second input tensor.
   See documentation of the Cast operator for further details.
   }];
-  let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$input,
-    AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$target_type);
-  let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>]>:$output);
+  let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$input,
+    AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$target_type,
+    DefaultValuedAttr<SI64Attr, "1">:$saturate);
+  let results = (outs AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[I8]>, TensorOf<[I16]>, TensorOf<[I32]>, TensorOf<[I64]>, TensorOf<[UI8]>, TensorOf<[UI16]>, TensorOf<[UI32]>, TensorOf<[UI64]>, TensorOf<[I1]>, TensorOf<[StringType]>, TensorOf<[BF16]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$output);
   let extraClassDeclaration = [{
     static int getNumberOfOperands() {
       return 2;
@@ -5685,15 +5686,20 @@ def ONNXQuantizeLinearOp:ONNX_Op<"QuantizeLinear",
   let description = [{
   The linear quantization operator. It consumes a high precision tensor, a scale, and a zero point to compute the low precision / quantized tensor.
   The scale factor and zero point must have same shape, and can be either a scalar for per-tensor / per layer quantization, or a 1-D tensor for per-axis quantization.
-  The quantization formula is y = saturate ((x / y_scale) + y_zero_point).
+  The quantization formula is `y = saturate ((x / y_scale) + y_zero_point)`.
   For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8.
-  For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details. 'y_zero_point' and 'y' must have same type.
-  }];
-  let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[I32]>]>:$x,
-    TensorOf<[F32]>:$y_scale,
-    AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, NoneType]>:$y_zero_point,
-    DefaultValuedAttr<SI64Attr, "1">:$axis);
-  let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>]>:$y);
+  For (x / y_scale), it's rounding to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details.
+  'y_zero_point' and 'y' must have same type.
+  'y_zero_point' is usually not used for quantization to float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz,
+  but the quantization formula remains the same for consistency and
+  the type of the attribute 'y_zero_point' still determines the quantization type.
+  }];
+  let arguments = (ins AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$x,
+    AnyTypeOf<[TensorOf<[F32]>, TensorOf<[F16]>, TensorOf<[BF16]>, TensorOf<[I32]>]>:$y_scale,
+    AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>, NoneType]>:$y_zero_point,
+    DefaultValuedAttr<SI64Attr, "1">:$axis,
+    DefaultValuedAttr<SI64Attr, "1">:$saturate);
+  let results = (outs AnyTypeOf<[TensorOf<[I8]>, TensorOf<[UI8]>, TensorOf<[F8E4M3FN]>, TensorOf<[F8E4M3FNUZ]>, TensorOf<[F8E5M2]>, TensorOf<[F8E5M2FNUZ]>]>:$y);
   let extraClassDeclaration = [{
     static int getNumberOfOperands() {
       return 3;
diff --git a/test/mlir/onnx/onnx_shape_inference.mlir b/test/mlir/onnx/onnx_shape_inference.mlir
index eb7196d54a..c84805fa35 100644
--- a/test/mlir/onnx/onnx_shape_inference.mlir
+++ b/test/mlir/onnx/onnx_shape_inference.mlir
@@ -1710,7 +1710,7 @@ func.func @test_castlike_1(%arg0 : tensor<2x3x4xf32>, %arg1 : tensor<2xf16>) ->
   "onnx.Return"(%1) : (tensor<*xf16>) -> ()
 
   // CHECK-LABEL: test_castlike_1
-  // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16>
+  // CHECK: [[RES:%.+]] = "onnx.CastLike"(%arg0, %arg1) {saturate = 1 : si64} : (tensor<2x3x4xf32>, tensor<2xf16>) -> tensor<2x3x4xf16>
   // CHECK: onnx.Return [[RES]] : tensor<2x3x4xf16>
 }
 
@@ -1739,7 +1739,7 @@ func.func @test_quantize_linear_1(%arg0 : tensor<5x2x3x4xf32>, %arg1 : tensor<f3
   "onnx.Return"(%1) {} : (tensor<*xi8>) -> ()
 
   // CHECK-LABEL: test_quantize_linear_1
-  // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<i8>) -> tensor<5x2x3x4xi8>
+  // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<i8>) -> tensor<5x2x3x4xi8>
   // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xi8>
 }
 
@@ -1750,7 +1750,7 @@ func.func @test_quantize_linear_2(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor<f32
  "onnx.Return"(%0) {} : (tensor<*xui8>) -> ()
 
  // CHECK-LABEL: test_quantize_linear_2
- // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<ui8>) -> tensor<5x2x3x4xui8>
+ // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %arg2) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, tensor<ui8>) -> tensor<5x2x3x4xui8>
  // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8>
 }
 
@@ -1762,7 +1762,7 @@ func.func @test_quantize_linear_3(%arg0 : tensor<5x2x3x4xf32>, %arg1: tensor<f32
  "onnx.Return"(%0) {} : (tensor<*xui8>) -> ()
 
  // CHECK-LABEL: test_quantize_linear_3
- // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, none) -> tensor<5x2x3x4xui8>
+ // CHECK: [[RES:%.+]] = "onnx.QuantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64, saturate = 1 : si64} : (tensor<5x2x3x4xf32>, tensor<f32>, none) -> tensor<5x2x3x4xui8>
  // CHECK: onnx.Return [[RES]] : tensor<5x2x3x4xui8>
 }
 
diff --git a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext
index 6b515dd8d9..8cd8468602 100644
--- a/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext
+++ b/test/mlir/onnx/parse/functiontest_attrwithdefault.onnxtext
@@ -24,10 +24,10 @@ myfun <a: float=1.0> (x) => (y) {
 // CHECK-LABEL:  func.func @main_graph
 // CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<?xf32>) -> tensor<?xf32> attributes {input_names = ["x"], output_names = ["y"]} {
 // CHECK:           [[VAR_0_:%.+]] = onnx.Constant {value_float = 2.000000e+00 : f32} : tensor<f32>
-// CHECK:           [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
+// CHECK:           [[VAR_1_:%.+]] = "onnx.CastLike"([[VAR_0_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
 // CHECK-DAG:       [[VAR_2_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_1_]]) : (tensor<?xf32>, tensor<f32>) -> tensor<?xf32>
 // CHECK-DAG:       [[VAR_3_:%.+]] = onnx.Constant {value_float = 1.000000e+00 : f32} : tensor<f32>
-// CHECK:           [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
+// CHECK:           [[VAR_4_:%.+]] = "onnx.CastLike"([[VAR_3_]], [[PARAM_0_]]) {saturate = 1 : si64} : (tensor<f32>, tensor<?xf32>) -> tensor<f32>
 // CHECK:           [[VAR_5_:%.+]] = "onnx.Add"([[PARAM_0_]], [[VAR_4_]]) : (tensor<?xf32>, tensor<f32>) -> tensor<?xf32>
 // CHECK:           [[VAR_6_:%.+]] = "onnx.Add"([[VAR_2_]], [[VAR_5_]]) : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
 // CHECK:           onnx.Return [[VAR_6_]] : tensor<?xf32>
diff --git a/utils/gen_onnx_mlir.py b/utils/gen_onnx_mlir.py
index 4e50c6a724..a793508896 100755
--- a/utils/gen_onnx_mlir.py
+++ b/utils/gen_onnx_mlir.py
@@ -98,7 +98,7 @@
  'BitwiseXor': [18],
  'BlackmanWindow': [17],
  'Cast': [19],
- 'CastLike': [15],
+ 'CastLike': [19],
  'CastMap': [1],
  'CategoryMapper': [1],
  'Ceil': [13],
@@ -206,7 +206,7 @@
  'Pow': [15],
  'QLinearConv': [10],
  'QLinearMatMul': [10],
- 'QuantizeLinear': [13],
+ 'QuantizeLinear': [19],
  'RNN': [14],
  'RandomNormal': [1],
  'RandomNormalLike': [1],

From 13cbadaf5ca8c94c46e4097432b22b252d6a7264 Mon Sep 17 00:00:00 2001
From: Megan Hampton <hamptonm@us.ibm.com>
Date: Wed, 6 Sep 2023 14:19:01 -0400
Subject: [PATCH 13/13] Add data type for python script

Signed-off-by: Megan Hampton <hamptonm@us.ibm.com>
---
 test/backend/inference_backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/backend/inference_backend.py b/test/backend/inference_backend.py
index 9e58bd08c4..4a4e57e8fc 100644
--- a/test/backend/inference_backend.py
+++ b/test/backend/inference_backend.py
@@ -1375,6 +1375,7 @@ def JniExecutionSession(jar_name, inputs):
         "f2": np.float16,
         "f4": np.float32,
         "f8": np.float64,
+        "U25": np.str_,
     }
 
     # print('stdout=' + str(procStdout), file=sys.stderr)