From f56fe2ccaa14340e33e9261aae18d947a4261d26 Mon Sep 17 00:00:00 2001
From: xieshuaihu <xieshuaihu@gmail.com>
Date: Mon, 1 Jul 2024 21:32:39 +0800
Subject: [PATCH 1/4] Fix match error in RapidsShuffleIterator.scala
 [scala2.13] (#11115)

Signed-off-by: xieshuaihu <xieshuaihu@gmail.com>
---
 .../nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala
index 29f36d6f2f7..72c1f935eed 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala
@@ -173,7 +173,7 @@ class RapidsShuffleIterator(
     val (local, remote) = blocksByAddress.partition(ba => ba._1.host == localHost)
 
     (local ++ remote).foreach {
-      case (blockManagerId: BlockManagerId, blockIds: Seq[(BlockId, Long, Int)]) => {
+      case (blockManagerId: BlockManagerId, blockIds: collection.Seq[(BlockId, Long, Int)]) => {
         val shuffleRequestsMapIndex: Seq[BlockIdMapIndex] =
           blockIds.map { case (blockId, _, mapIndex) =>
             /**
@@ -193,7 +193,7 @@ class RapidsShuffleIterator(
                 throw new IllegalArgumentException(
                   s"${blockId.getClass} $blockId is not currently supported")
             }
-          }
+          }.toSeq
 
         val client = try {
           transport.makeClient(blockManagerId)

From 850365ceac86d40531fed74f45221a81db7d9132 Mon Sep 17 00:00:00 2001
From: MithunR <mithunr@nvidia.com>
Date: Mon, 1 Jul 2024 13:58:47 -0700
Subject: [PATCH 2/4] Spark 4: Handle ANSI mode in sort_test.py (#11099)

* Spark 4: Handle ANSI mode in sort_test.py

Fixes #11027.

With ANSI mode enabled (like the default in Spark 4), one sees that some
tests in `sort_test.py` fail, because they expect ANSI mode to be off.

This commit disables running those tests with ANSI enabled, and add a
separate test for ANSI on/off.

Signed-off-by: MithunR <mithunr@nvidia.com>

* Refactored not to use disable_ansi_mode.

These tests need not be revisited.  They test all combinations of ANSI mode,
including overflow failures.

Signed-off-by: MithunR <mithunr@nvidia.com>

---------

Signed-off-by: MithunR <mithunr@nvidia.com>
---
 .../src/main/python/sort_test.py              | 66 +++++++++++++++----
 1 file changed, 52 insertions(+), 14 deletions(-)

diff --git a/integration_tests/src/main/python/sort_test.py b/integration_tests/src/main/python/sort_test.py
index cb905c9fb77..3fe406d180a 100644
--- a/integration_tests/src/main/python/sort_test.py
+++ b/integration_tests/src/main/python/sort_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
 
 import pytest
 
-from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect
+from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error, assert_gpu_fallback_collect
 from conftest import is_not_utc
 from data_gen import *
 from marks import allow_non_gpu
@@ -224,29 +224,67 @@ def test_multi_orderby_with_limit_single_part(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).coalesce(1).orderBy(f.col('a'), f.col('b').desc()).limit(100))
 
+
 # We are not trying all possibilities, just doing a few with numbers so the query works.
-@pytest.mark.parametrize('data_gen', [byte_gen, long_gen, float_gen], ids=idfn)
-def test_orderby_with_processing(data_gen):
+@pytest.mark.parametrize('is_ansi_enabled', [False, True])
+@pytest.mark.parametrize('data_gen', [ByteGen, LongGen, FloatGen], ids=idfn)
+def test_orderby_with_processing(data_gen, is_ansi_enabled):
+    """
+    Tests the cases where arithmetic overflows don't occur, in ANSI mode.
+    Overflow exceptions are tested in test_orderby_with_ansi_overflow_exceptions.
+    """
+    conf = {'spark.sql.ansi.enabled': is_ansi_enabled}
+    gen = data_gen(min_val=0) if (is_ansi_enabled and data_gen != FloatGen) else data_gen()
     assert_gpu_and_cpu_are_equal_collect(
-            # avoid ambiguity in the order by statement for floating point by including a as a backup ordering column
-            lambda spark : unary_op_df(spark, data_gen).orderBy(f.lit(100) - f.col('a'), f.col('a')))
+            # avoid ambiguity in the order by statement for floating point by including `a` as a backup ordering column
+            lambda spark: unary_op_df(spark, gen).orderBy(f.lit(100) - f.col('a'), f.col('a')),
+            conf=conf)
+
+
+@pytest.mark.parametrize('data_gen', [long_gen], ids=idfn)
+def test_orderby_with_ansi_overflow_exceptions(data_gen):
+    """
+    Test to check that ANSI mode is honoured when there's an order-by with a subtraction expression.
+    With ANSI mode enabled, the subtraction will overflow, causing an ArithmeticException.
+    """
+    def test_function(spark):
+        return unary_op_df(spark, data_gen).orderBy(f.lit(100) - f.col('a'), f.col('a'))
+
+    assert_gpu_and_cpu_error(lambda spark: test_function(spark).collect(),
+                             conf=ansi_enabled_conf,
+                             error_message='ArithmeticException')
+
 
 # We are not trying all possibilities, just doing a few with numbers so the query works.
-@pytest.mark.parametrize('data_gen', [byte_gen, long_gen, float_gen], ids=idfn)
-def test_orderby_with_processing_and_limit(data_gen):
+@pytest.mark.parametrize('is_ansi_enabled', [False, True])
+@pytest.mark.parametrize('data_gen', [ByteGen, LongGen, FloatGen], ids=idfn)
+def test_orderby_with_processing_and_limit(data_gen, is_ansi_enabled):
+    """
+    Tests the cases where arithmetic overflows don't occur, in ANSI mode.
+    Overflow exceptions are tested in test_orderby_with_ansi_overflow_exceptions.
+    """
+    conf = {'spark.sql.ansi.enabled': is_ansi_enabled}
+    gen = data_gen(min_val=0) if (is_ansi_enabled and data_gen != FloatGen) else data_gen()
     assert_gpu_and_cpu_are_equal_collect(
-            # avoid ambiguity in the order by statement for floating point by including a as a backup ordering column
-            lambda spark : unary_op_df(spark, data_gen).orderBy(f.lit(100) - f.col('a'), f.col('a')).limit(100))
+        # avoid ambiguity in the order by statement for floating point by including a as a backup ordering column
+        lambda spark: unary_op_df(spark, gen).orderBy(f.lit(100) - f.col('a'), f.col('a')).limit(100), conf=conf)
 
 
 # We are not trying all possibilities, just doing a few with numbers so the query works.
+@pytest.mark.parametrize('is_ansi_enabled', [False, True])
 @pytest.mark.parametrize('data_gen', [StructGen([('child0', long_gen)])], ids=idfn)
-def test_single_nested_orderby_with_processing_and_limit(data_gen):
+def test_single_nested_orderby_with_processing_and_limit(data_gen, is_ansi_enabled):
+    """
+    Tests the cases where arithmetic overflows don't occur, in ANSI mode.
+    Overflow exceptions are tested in test_orderby_with_ansi_overflow_exceptions.
+    """
+    conf = {'spark.sql.ansi.enabled': is_ansi_enabled}
+    data_gen = StructGen([('child0', LongGen(min_val=0) if is_ansi_enabled else LongGen())])
     assert_gpu_and_cpu_are_equal_collect(
         # avoid ambiguity in the order by statement for floating point by including a as a backup ordering column
-        lambda spark : unary_op_df(spark, data_gen)\
-            .orderBy(f.struct(f.lit(100) - f.col('a.child0')), f.col('a'))\
-            .limit(100))
+        lambda spark: unary_op_df(spark, data_gen)\
+                        .orderBy(f.struct(f.lit(100) - f.col('a.child0')), f.col('a')).limit(100),
+        conf=conf)
 
 # We are not trying all possibilities, just doing a few with numbers so the query works.
 @pytest.mark.parametrize('data_gen', [byte_gen, long_gen, float_gen], ids=idfn)

From 9bb295a21ce7645aa51551ca83716cd33dce722e Mon Sep 17 00:00:00 2001
From: Renjie Liu <liurenjie2008@gmail.com>
Date: Tue, 2 Jul 2024 09:40:29 +0800
Subject: [PATCH 3/4] Introduce LORE framework. (#11084)

* Introduce lore id

* Introduce lore id

* Fix type

* Fix type

* Conf

* style

* part

* Dump

* Introduce lore framework

* Add tests.

* Rename test case

Signed-off-by: liurenjie1024 <liurenjie2008@gmail.com>

* Fix AQE test

* Fix style

* Use args to display lore info.

* Fix build break

* Fix path in loreinfo

* Remove path

* Fix comments

* Update configs

* Fix comments

* Fix config

---------

Signed-off-by: liurenjie1024 <liurenjie2008@gmail.com>
---
 .../advanced_configs.md                       |   2 +
 docs/dev/lore.md                              |  70 +++++
 .../com/nvidia/spark/rapids/DumpUtils.scala   |  28 +-
 .../spark/rapids/GpuAggregateExec.scala       |   9 +-
 .../com/nvidia/spark/rapids/GpuExec.scala     |  30 +-
 .../nvidia/spark/rapids/GpuOverrides.scala    |   8 +-
 .../spark/rapids/GpuTransitionOverrides.scala |   5 +
 .../com/nvidia/spark/rapids/RapidsConf.scala  |  31 ++
 .../nvidia/spark/rapids/lore/GpuLore.scala    | 295 ++++++++++++++++++
 .../spark/rapids/lore/OutputLoreId.scala      |  75 +++++
 .../com/nvidia/spark/rapids/lore/dump.scala   | 106 +++++++
 .../nvidia/spark/rapids/lore/package.scala    |  35 +++
 .../com/nvidia/spark/rapids/lore/replay.scala | 102 ++++++
 .../execution/GpuBroadcastExchangeExec.scala  |  18 +-
 .../execution/datasources/GpuWriteFiles.scala |   2 +-
 .../spark/rapids/lore/GpuLoreSuite.scala      | 169 ++++++++++
 .../spark/rapids/lore/OutputLoreIdSuite.scala |  55 ++++
 17 files changed, 1029 insertions(+), 11 deletions(-)
 create mode 100644 docs/dev/lore.md
 create mode 100644 sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/GpuLore.scala
 create mode 100644 sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/OutputLoreId.scala
 create mode 100644 sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/dump.scala
 create mode 100644 sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/package.scala
 create mode 100644 sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/replay.scala
 create mode 100644 tests/src/test/scala/com/nvidia/spark/rapids/lore/GpuLoreSuite.scala
 create mode 100644 tests/src/test/scala/com/nvidia/spark/rapids/lore/OutputLoreIdSuite.scala

diff --git a/docs/additional-functionality/advanced_configs.md b/docs/additional-functionality/advanced_configs.md
index 033e332b99c..f5d511cbbc5 100644
--- a/docs/additional-functionality/advanced_configs.md
+++ b/docs/additional-functionality/advanced_configs.md
@@ -136,6 +136,8 @@ Name | Description | Default Value | Applicable at
 <a name="sql.json.read.decimal.enabled"></a>spark.rapids.sql.json.read.decimal.enabled|When reading a quoted string as a decimal Spark supports reading non-ascii unicode digits, and the RAPIDS Accelerator does not.|true|Runtime
 <a name="sql.json.read.double.enabled"></a>spark.rapids.sql.json.read.double.enabled|JSON reading is not 100% compatible when reading doubles.|true|Runtime
 <a name="sql.json.read.float.enabled"></a>spark.rapids.sql.json.read.float.enabled|JSON reading is not 100% compatible when reading floats.|true|Runtime
+<a name="sql.lore.dumpPath"></a>spark.rapids.sql.lore.dumpPath|The path to dump the LORE nodes' input data. This must be set if spark.rapids.sql.lore.idsToDump has been set. The data of each LORE node will be dumped to a subfolder with name 'loreId-<LORE id>' under this path. For more details, please refer to [the LORE documentation](../dev/lore.md).|None|Runtime
+<a name="sql.lore.idsToDump"></a>spark.rapids.sql.lore.idsToDump|Specify the LORE ids of operators to dump. The format is a comma separated list of LORE ids. For example: "1[0]" will dump partition 0 of input of gpu operator with lore id 1. For more details, please refer to [the LORE documentation](../dev/lore.md). If this is not set, no data will be dumped.|None|Runtime
 <a name="sql.mode"></a>spark.rapids.sql.mode|Set the mode for the Rapids Accelerator. The supported modes are explainOnly and executeOnGPU. This config can not be changed at runtime, you must restart the application for it to take affect. The default mode is executeOnGPU, which means the RAPIDS Accelerator plugin convert the Spark operations and execute them on the GPU when possible. The explainOnly mode allows running queries on the CPU and the RAPIDS Accelerator will evaluate the queries as if it was going to run on the GPU. The explanations of what would have run on the GPU and why are output in log messages. When using explainOnly mode, the default explain output is ALL, this can be changed by setting spark.rapids.sql.explain. See that config for more details.|executeongpu|Startup
 <a name="sql.optimizer.joinReorder.enabled"></a>spark.rapids.sql.optimizer.joinReorder.enabled|When enabled, joins may be reordered for improved query performance|true|Runtime
 <a name="sql.python.gpu.enabled"></a>spark.rapids.sql.python.gpu.enabled|This is an experimental feature and is likely to change in the future. Enable (true) or disable (false) support for scheduling Python Pandas UDFs with GPU resources. When enabled, pandas UDFs are assumed to share the same GPU that the RAPIDs accelerator uses and will honor the python GPU configs|false|Runtime
diff --git a/docs/dev/lore.md b/docs/dev/lore.md
new file mode 100644
index 00000000000..d6b28877ae7
--- /dev/null
+++ b/docs/dev/lore.md
@@ -0,0 +1,70 @@
+---
+layout: page
+title: The Local Replay Framework
+nav_order: 13
+parent: Developer Overview
+---
+
+# Local Replay Framework
+
+## Overview
+
+LORE (the local replay framework) is a tool that allows developer to replay the execution of a 
+gpu operator in local environment, so that developer could debug and profile the operator for 
+performance analysis. In high level it works as follows:
+
+1. Each gpu operator will be assigned a LORE id, which is a unique identifier for the operator. 
+   This id is guaranteed to be unique within the same query, and guaranteed to be same when two 
+   sql executions have same sql, same configuration, and same data. 
+2. In the first run of the query, developer could found the LORE id of the operator they are 
+   interested in by checking spark ui, where LORE id usually appears in the arguments of operator.
+3. In the second run of the query, developer needs to configure the LORE ids of the operators they 
+   are interested in, and LORE will dump the input data of the operator to given path.
+4. Developer could copy the dumped data to local environment, and replay the operator in local 
+   environment.
+
+## Configuration
+
+By default, LORE id will always be generated for operators, but user could disable this behavior 
+by setting `spark.rapids.sql.lore.tag.enabled` to `false`. 
+
+To tell LORE the LORE ids of the operators you are interested in, you need to set 
+`spark.rapids.sql.lore.idsToDump`. For example, you could set it to "1[*], 2[*], 3[*]" to tell 
+LORE to dump all partitions of input data of operators with id 1, 2, or 3. You can also only dump 
+some partition of the operator's input by appending partition numbers to lore ids. For example, 
+"1[0 4-6 7], 2[*]" tell LORE to dump operator with LORE id 1, but only dump partition 0, 4, 5, 6, 
+and 7. But for operator with LORE id 2, it will dump all partitions. 
+
+You also need to set `spark.rapids.sql.lore.dumpPath` to tell LORE where to dump the data, the 
+value of which should point to a directory. All dumped data of a query will live in this 
+directory. A typical directory hierarchy would look like this:
+
+```console
++ loreId-10/
+  - plan.meta
+  + input-0/
+    - rdd.meta
+    + partition-0/
+      - partition.meta
+      - batch-0.parquet
+      - batch-1.parquet
+    + partition-1/
+      - partition.meta
+      - batch-0.parquet
+  + input-1/
+    - rdd.meta
+    + partition-0/
+      - partition.meta
+      - batch-0.parquet
+      - batch-1.parquet
+ 
++ loreId-15/
+  - plan.meta
+  + input-0/
+    - rdd.meta
+    + partition-0/
+      - partition.meta
+      - batch-0.parquet
+```
+
+
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/DumpUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/DumpUtils.scala
index bf949897c78..21d2de6ad68 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/DumpUtils.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/DumpUtils.scala
@@ -15,7 +15,7 @@
  */
 package com.nvidia.spark.rapids
 
-import java.io.{File, FileOutputStream}
+import java.io.{File, FileOutputStream, OutputStream}
 import java.util.Random
 
 import scala.collection.mutable
@@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import ai.rapids.cudf._
 import ai.rapids.cudf.ColumnWriterOptions._
-import com.nvidia.spark.rapids.Arm.withResource
+import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
 import org.apache.commons.io.IOUtils
 import org.apache.hadoop.conf.Configuration
 
@@ -82,6 +82,23 @@ object DumpUtils extends Logging {
     }
   }
 
+  /**
+   * Dump columnar batch to output stream in parquet format. <br>
+   *
+   * @param columnarBatch The columnar batch to be dumped, should be GPU columnar batch. It
+   *                      should be closed by caller.
+   * @param outputStream Will be closed after writing.
+   */
+  def dumpToParquet(columnarBatch: ColumnarBatch, outputStream: OutputStream): Unit = {
+    closeOnExcept(outputStream) { _ =>
+      withResource(GpuColumnVector.from(columnarBatch)) { table =>
+        withResource(new ParquetDumper(outputStream, table)) { dumper =>
+          dumper.writeTable(table)
+        }
+      }
+    }
+  }
+
   /**
    * Debug utility to dump table to parquet file. <br>
    * It's running on GPU. Parquet column names are generated from table column type info. <br>
@@ -129,12 +146,15 @@ object DumpUtils extends Logging {
 }
 
 // parquet dumper
-class ParquetDumper(path: String, table: Table) extends HostBufferConsumer
+class ParquetDumper(private val outputStream: OutputStream, table: Table) extends HostBufferConsumer
   with AutoCloseable {
-  private[this] val outputStream = new FileOutputStream(path)
   private[this] val tempBuffer = new Array[Byte](128 * 1024)
   private[this] val buffers = mutable.Queue[(HostMemoryBuffer, Long)]()
 
+  def this(path: String, table: Table) = {
+    this(new FileOutputStream(path), table)
+  }
+
   val tableWriter: TableWriter = {
     // avoid anything conversion, just dump as it is
     val builder = ParquetDumper.parquetWriterOptionsFromTable(ParquetWriterOptions.builder(), table)
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala
index 7e6a1056d01..b35e687d185 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala
@@ -1827,6 +1827,7 @@ case class GpuHashAggregateExec(
        |${ExplainUtils.generateFieldString("Functions", aggregateExpressions)}
        |${ExplainUtils.generateFieldString("Aggregate Attributes", aggregateAttributes)}
        |${ExplainUtils.generateFieldString("Results", resultExpressions)}
+       |Lore: ${loreArgs.mkString(", ")}
        |""".stripMargin
   }
 
@@ -1955,10 +1956,12 @@ case class GpuHashAggregateExec(
       truncatedString(allAggregateExpressions, "[", ", ", "]", maxFields)
     val outputString = truncatedString(output, "[", ", ", "]", maxFields)
     if (verbose) {
-      s"GpuHashAggregate(keys=$keyString, functions=$functionString, output=$outputString)"
+      s"$nodeName (keys=$keyString, functions=$functionString, output=$outputString) " +
+        s"""${loreArgs.mkString(", ")}"""
     } else {
-      s"GpuHashAggregate(keys=$keyString, functions=$functionString)," +
-          s" filters=${aggregateExpressions.map(_.filter)})"
+      s"$nodeName (keys=$keyString, functions=$functionString)," +
+          s" filters=${aggregateExpressions.map(_.filter)})" +
+          s""" ${loreArgs.mkString(", ")}"""
     }
   }
   //
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuExec.scala
index d83f20113b2..e93ac40b5bd 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuExec.scala
@@ -19,7 +19,10 @@ package com.nvidia.spark.rapids
 import ai.rapids.cudf.NvtxColor
 import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.filecache.FileCacheConf
+import com.nvidia.spark.rapids.lore.{GpuLore, GpuLoreDumpRDD}
+import com.nvidia.spark.rapids.lore.GpuLore.{loreIdOf, LORE_DUMP_PATH_TAG, LORE_DUMP_RDD_TAG}
 import com.nvidia.spark.rapids.shims.SparkShimImpl
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.rapids.LocationPreservingMapPartitionsRDD
@@ -385,7 +388,8 @@ trait GpuExec extends SparkPlan {
     this.getTagValue(GpuExec.TASK_METRICS_TAG)
 
   final override def doExecuteColumnar(): RDD[ColumnarBatch] = {
-    val orig = internalDoExecuteColumnar()
+    this.dumpLoreMetaInfo()
+    val orig = this.dumpLoreRDD(internalDoExecuteColumnar())
     val metrics = getTaskMetrics
     metrics.map { gpuMetrics =>
       // This is ugly, but it reduces the need to change all exec nodes, so we are doing it here
@@ -396,5 +400,29 @@ trait GpuExec extends SparkPlan {
     }.getOrElse(orig)
   }
 
+  override def stringArgs: Iterator[Any] = super.stringArgs ++ loreArgs
+
+  protected def loreArgs: Iterator[String] = {
+    val loreIdStr = loreIdOf(this).map(id => s"[loreId=$id]")
+    val lorePathStr = getTagValue(LORE_DUMP_PATH_TAG).map(path => s"[lorePath=$path]")
+    val loreRDDInfoStr = getTagValue(LORE_DUMP_RDD_TAG).map(info => s"[loreRDDInfo=$info]")
+
+    List(loreIdStr, lorePathStr, loreRDDInfoStr).flatten.iterator
+  }
+
+  private def dumpLoreMetaInfo(): Unit = {
+    getTagValue(LORE_DUMP_PATH_TAG).foreach { rootPath =>
+      GpuLore.dumpPlan(this, new Path(rootPath))
+    }
+  }
+
+  protected def dumpLoreRDD(inner: RDD[ColumnarBatch]): RDD[ColumnarBatch] = {
+    getTagValue(LORE_DUMP_RDD_TAG).map { info =>
+      val rdd = new GpuLoreDumpRDD(info, inner)
+      rdd.saveMeta()
+      rdd
+    }.getOrElse(inner)
+  }
+
   protected def internalDoExecuteColumnar(): RDD[ColumnarBatch]
 }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
index 9e26cf751f4..73475ef36f5 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -25,6 +25,7 @@ import scala.util.control.NonFatal
 import ai.rapids.cudf.DType
 import com.nvidia.spark.rapids.RapidsConf.{SUPPRESS_PLANNING_FAILURE, TEST_CONF}
 import com.nvidia.spark.rapids.jni.GpuTimeZoneDB
+import com.nvidia.spark.rapids.lore.GpuLore
 import com.nvidia.spark.rapids.shims._
 import com.nvidia.spark.rapids.window.{GpuDenseRank, GpuLag, GpuLead, GpuPercentRank, GpuRank, GpuRowNumber, GpuSpecialFrameBoundary, GpuWindowExecMeta, GpuWindowSpecDefinitionMeta}
 import org.apache.hadoop.fs.Path
@@ -4708,7 +4709,12 @@ case class GpuOverrides() extends Rule[SparkPlan] with Logging {
           }
         }
       }
-      GpuOverrides.doConvertPlan(wrap, conf, optimizations)
+      val convertedPlan = GpuOverrides.doConvertPlan(wrap, conf, optimizations)
+      if (conf.isTagLoreIdEnabled) {
+        GpuLore.tagForLore(convertedPlan, conf)
+      } else {
+        convertedPlan
+      }
     }
   }
 }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuTransitionOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuTransitionOverrides.scala
index 48f9de5a61a..c8596f983d9 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuTransitionOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuTransitionOverrides.scala
@@ -21,6 +21,7 @@ import java.util.concurrent.atomic.AtomicInteger
 import scala.annotation.tailrec
 import scala.collection.mutable
 
+import com.nvidia.spark.rapids.lore.GpuLore
 import com.nvidia.spark.rapids.shims.{GpuBatchScanExec, SparkShimImpl}
 
 import org.apache.spark.SparkContext
@@ -823,6 +824,10 @@ class GpuTransitionOverrides extends Rule[SparkPlan] {
           updatedPlan = fixupAdaptiveExchangeReuse(updatedPlan)
         }
 
+        if (rapidsConf.isTagLoreIdEnabled) {
+          updatedPlan = GpuLore.tagForLore(updatedPlan, rapidsConf)
+        }
+
         if (rapidsConf.logQueryTransformations) {
           logWarning(s"Transformed query:" +
             s"\nOriginal Plan:\n$plan\nTransformed Plan:\n$updatedPlan")
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index aad4f05b334..406d09a7a32 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -23,6 +23,7 @@ import scala.collection.mutable.{HashMap, ListBuffer}
 
 import ai.rapids.cudf.Cuda
 import com.nvidia.spark.rapids.jni.RmmSpark.OomInjectionType
+import com.nvidia.spark.rapids.lore.{LoreId, OutputLoreId}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
@@ -2308,6 +2309,28 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
     .booleanConf
     .createWithDefault(false)
 
+  val TAG_LORE_ID_ENABLED = conf("spark.rapids.sql.lore.tag.enabled")
+    .doc("Enable add a LORE id to each gpu plan node")
+    .internal()
+    .booleanConf
+    .createWithDefault(true)
+
+  val LORE_DUMP_IDS = conf("spark.rapids.sql.lore.idsToDump")
+    .doc("Specify the LORE ids of operators to dump. The format is a comma separated list of " +
+      "LORE ids. For example: \"1[0]\" will dump partition 0 of input of gpu operator " +
+      "with lore id 1. For more details, please refer to " +
+      "[the LORE documentation](../dev/lore.md). If this is not set, no data will be dumped.")
+    .stringConf
+    .createOptional
+
+  val LORE_DUMP_PATH = conf("spark.rapids.sql.lore.dumpPath")
+    .doc(s"The path to dump the LORE nodes' input data. This must be set if ${LORE_DUMP_IDS.key} " +
+      "has been set. The data of each LORE node will be dumped to a subfolder with name " +
+      "'loreId-<LORE id>' under this path. For more details, please refer to " +
+      "[the LORE documentation](../dev/lore.md).")
+    .stringConf
+    .createOptional
+
   private def printSectionHeader(category: String): Unit =
     println(s"\n### $category")
 
@@ -3121,6 +3144,14 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
 
   lazy val isDeltaLowShuffleMergeEnabled: Boolean = get(ENABLE_DELTA_LOW_SHUFFLE_MERGE)
 
+  lazy val isTagLoreIdEnabled: Boolean = get(TAG_LORE_ID_ENABLED)
+
+  lazy val loreDumpIds: Map[LoreId, OutputLoreId] = get(LORE_DUMP_IDS)
+    .map(OutputLoreId.parse)
+    .getOrElse(Map.empty)
+
+  lazy val loreDumpPath: Option[String] = get(LORE_DUMP_PATH)
+
   private val optimizerDefaults = Map(
     // this is not accurate because CPU projections do have a cost due to appending values
     // to each row that is produced, but this needs to be a really small number because
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/GpuLore.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/GpuLore.scala
new file mode 100644
index 00000000000..a51a1e13a5e
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/GpuLore.scala
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.lore
+
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap}
+import java.util.concurrent.atomic.AtomicInteger
+
+import scala.collection.mutable
+import scala.reflect.ClassTag
+
+import com.nvidia.spark.rapids.{GpuColumnarToRowExec, GpuExec, RapidsConf}
+import com.nvidia.spark.rapids.Arm.withResource
+import com.nvidia.spark.rapids.shims.SparkShimImpl
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.trees.TreeNodeTag
+import org.apache.spark.sql.execution.{BaseSubqueryExec, ExecSubqueryExpression, ReusedSubqueryExec, SparkPlan, SQLExecution}
+import org.apache.spark.sql.execution.adaptive.BroadcastQueryStageExec
+import org.apache.spark.sql.rapids.execution.{GpuBroadcastExchangeExec, GpuCustomShuffleReaderExec}
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.util.SerializableConfiguration
+
+case class LoreRDDMeta(numPartitions: Int, outputPartitions: Seq[Int], attrs: Seq[Attribute])
+
+case class LoreRDDPartitionMeta(numBatches: Int, dataType: Seq[DataType])
+
+trait GpuLoreRDD {
+  def rootPath: Path
+
+  def pathOfMeta: Path = new Path(rootPath, "rdd.meta")
+
+  def pathOfPartition(partitionIndex: Int): Path = {
+    new Path(rootPath, s"partition-$partitionIndex")
+  }
+
+  def pathOfPartitionMeta(partitionIndex: Int): Path = {
+    new Path(pathOfPartition(partitionIndex), "partition.meta")
+  }
+
+  def pathOfBatch(partitionIndex: Int, batchIndex: Int): Path = {
+    new Path(pathOfPartition(partitionIndex), s"batch-$batchIndex.parquet")
+  }
+}
+
+
+object GpuLore {
+  /**
+   * Lore id of a plan node.
+   */
+  val LORE_ID_TAG: TreeNodeTag[String] = new TreeNodeTag[String]("rapids.gpu.lore.id")
+  /**
+   * When a [[GpuExec]] node has this tag, it means that this node is a root node whose meta and
+   * input should be dumped.
+   */
+  val LORE_DUMP_PATH_TAG: TreeNodeTag[String] = new TreeNodeTag[String]("rapids.gpu.lore.dump.path")
+  /**
+   * When a [[GpuExec]] node has this tag, it means that this node is a child node whose data
+   * should be dumped.
+   */
+  val LORE_DUMP_RDD_TAG: TreeNodeTag[LoreDumpRDDInfo] = new TreeNodeTag[LoreDumpRDDInfo](
+    "rapids.gpu.lore.dump.rdd.info")
+
+  def pathOfRootPlanMeta(rootPath: Path): Path = {
+    new Path(rootPath, "plan.meta")
+  }
+
+  def dumpPlan[T <: SparkPlan : ClassTag](plan: T, rootPath: Path): Unit = {
+    dumpObject(plan, pathOfRootPlanMeta(rootPath),
+      SparkShimImpl.sessionFromPlan(plan).sparkContext.hadoopConfiguration)
+  }
+
+  def dumpObject[T: ClassTag](obj: T, path: Path, hadoopConf: Configuration): Unit = {
+    withResource(path.getFileSystem(hadoopConf)) { fs =>
+      withResource(fs.create(path, false)) { fout =>
+        val serializerStream = SparkEnv.get.serializer.newInstance().serializeStream(fout)
+        withResource(serializerStream) { ser =>
+          ser.writeObject(obj)
+        }
+      }
+    }
+  }
+
+  def loadObject[T: ClassTag](path: Path, hadoopConf: Configuration): T = {
+    withResource(path.getFileSystem(hadoopConf)) { fs =>
+      withResource(fs.open(path)) { fin =>
+        val serializerStream = SparkEnv.get.serializer.newInstance().deserializeStream(fin)
+        withResource(serializerStream) { ser =>
+          ser.readObject().asInstanceOf[T]
+        }
+      }
+    }
+  }
+
+  def pathOfChild(rootPath: Path, childIndex: Int): Path = {
+    new Path(rootPath, s"input-$childIndex")
+  }
+
+  def restoreGpuExec(rootPath: Path, spark: SparkSession): GpuExec = {
+    val rootExec = loadObject[GpuExec](pathOfRootPlanMeta(rootPath),
+      spark.sparkContext.hadoopConfiguration)
+
+    checkUnsupportedOperator(rootExec)
+
+    val broadcastHadoopConf = {
+      val sc = spark.sparkContext
+      sc.broadcast(new SerializableConfiguration(spark.sparkContext.hadoopConfiguration))
+    }
+
+    // Load children
+    val newChildren = rootExec.children.zipWithIndex.map { case (plan, idx) =>
+      val newChild = GpuLoreReplayExec(idx, rootPath.toString, broadcastHadoopConf)
+      plan match {
+        case b: GpuBroadcastExchangeExec =>
+          b.withNewChildren(Seq(newChild))
+        case b: BroadcastQueryStageExec =>
+          b.broadcast.withNewChildren(Seq(newChild))
+        case _ => newChild
+      }
+    }
+
+    var nextId = rootExec.children.length
+
+    rootExec.transformExpressionsUp {
+      case sub: ExecSubqueryExpression =>
+        val newSub = restoreSubqueryPlan(nextId, sub, rootPath, broadcastHadoopConf)
+        nextId += 1
+        newSub
+    }.withNewChildren(newChildren).asInstanceOf[GpuExec]
+  }
+
+  private def restoreSubqueryPlan(id: Int, sub: ExecSubqueryExpression,
+      rootPath: Path, hadoopConf: Broadcast[SerializableConfiguration]): ExecSubqueryExpression = {
+    val innerPlan = sub.plan.child
+
+    if (innerPlan.isInstanceOf[GpuExec]) {
+      var newChild: SparkPlan = GpuLoreReplayExec(id, rootPath.toString, hadoopConf)
+
+      if (!innerPlan.supportsColumnar) {
+        newChild = GpuColumnarToRowExec(newChild)
+      }
+      val newSubqueryExec = sub.plan match {
+        case ReusedSubqueryExec(subqueryExec) => subqueryExec.withNewChildren(Seq(newChild))
+          .asInstanceOf[BaseSubqueryExec]
+        case p: BaseSubqueryExec => p.withNewChildren(Seq(newChild))
+          .asInstanceOf[BaseSubqueryExec]
+      }
+      sub.withNewPlan(newSubqueryExec)
+    } else {
+      throw new IllegalArgumentException(s"Subquery plan ${innerPlan.getClass.getSimpleName} " +
+        s"is not a GpuExec")
+    }
+  }
+
+  /**
+   * Lore id generator. Key is [[SQLExecution.EXECUTION_ID_KEY]].
+   */
+  private val idGen: ConcurrentMap[String, AtomicInteger] =
+    new ConcurrentHashMap[String, AtomicInteger]()
+
+  private def nextLoreIdOf(plan: SparkPlan): Option[Int] = {
+    // When the execution id is not set, it means there is no actual execution happening, in this
+    // case we don't need to generate lore id.
+    Option(SparkShimImpl.sessionFromPlan(plan)
+      .sparkContext
+      .getLocalProperty(SQLExecution.EXECUTION_ID_KEY))
+      .map { executionId =>
+        idGen.computeIfAbsent(executionId, _ => new AtomicInteger(0)).getAndIncrement()
+      }
+  }
+
+  def tagForLore(sparkPlan: SparkPlan, rapidsConf: RapidsConf): SparkPlan = {
+    val loreDumpIds = rapidsConf.loreDumpIds
+
+    val newPlan = if (loreDumpIds.nonEmpty) {
+      // We need to dump the output of nodes with the lore id in the dump ids
+      val loreOutputRootPath = rapidsConf.loreDumpPath.getOrElse(throw
+        new IllegalArgumentException(s"${RapidsConf.LORE_DUMP_PATH.key} must be set " +
+          s"when ${RapidsConf.LORE_DUMP_IDS.key} is set."))
+
+      val spark = SparkShimImpl.sessionFromPlan(sparkPlan)
+      val hadoopConf = {
+        val sc = spark.sparkContext
+        sc.broadcast(new SerializableConfiguration(sc.hadoopConfiguration))
+      }
+
+      val subqueries = mutable.Set.empty[SparkPlan]
+
+      sparkPlan.foreachUp {
+        case g: GpuExec =>
+          nextLoreIdOf(g).foreach { loreId =>
+            g.setTagValue(LORE_ID_TAG, loreId.toString)
+
+            loreDumpIds.get(loreId).foreach { outputLoreIds =>
+              checkUnsupportedOperator(g)
+              val currentExecRootPath = new Path(loreOutputRootPath, s"loreId-$loreId")
+              g.setTagValue(LORE_DUMP_PATH_TAG, currentExecRootPath.toString)
+              val loreOutputInfo = LoreOutputInfo(outputLoreIds,
+                currentExecRootPath.toString)
+
+              g.children.zipWithIndex.foreach {
+                case (child, idx) =>
+                  val dumpRDDInfo = LoreDumpRDDInfo(idx, loreOutputInfo, child.output, hadoopConf)
+                  child match {
+                    case c: BroadcastQueryStageExec =>
+                      c.broadcast.setTagValue(LORE_DUMP_RDD_TAG, dumpRDDInfo)
+                    case o => o.setTagValue(LORE_DUMP_RDD_TAG, dumpRDDInfo)
+                  }
+              }
+
+              var nextId = g.children.length
+              g.transformExpressionsUp {
+                case sub: ExecSubqueryExpression =>
+                  if (spark.sessionState.conf.subqueryReuseEnabled) {
+                    if (!subqueries.contains(sub.plan.canonicalized)) {
+                      subqueries += sub.plan.canonicalized
+                    } else {
+                      throw new IllegalArgumentException("Subquery reuse is enabled, and we found" +
+                        " duplicated subqueries, which is currently not supported by LORE.")
+                    }
+                  }
+                  tagSubqueryPlan(nextId, sub, loreOutputInfo, hadoopConf)
+                  nextId += 1
+                  sub
+              }
+            }
+          }
+        case _ =>
+      }
+
+      sparkPlan
+
+    } else {
+      // We don't need to dump the output of the nodes, just tag the lore id
+      sparkPlan.foreachUp {
+        case g: GpuExec =>
+          nextLoreIdOf(g).foreach { loreId =>
+            g.setTagValue(LORE_ID_TAG, loreId.toString)
+          }
+        case _ =>
+      }
+
+      sparkPlan
+    }
+
+    newPlan
+  }
+
+  def loreIdOf(node: SparkPlan): Option[String] = {
+    node.getTagValue(LORE_ID_TAG)
+  }
+
+  private def tagSubqueryPlan(id: Int, sub: ExecSubqueryExpression,
+      loreOutputInfo: LoreOutputInfo, hadoopConf: Broadcast[SerializableConfiguration]) = {
+    val innerPlan = sub.plan.child
+    if (innerPlan.isInstanceOf[GpuExec]) {
+      val dumpRDDInfo = LoreDumpRDDInfo(id, loreOutputInfo, innerPlan.output,
+        hadoopConf)
+      innerPlan match {
+        case p: GpuColumnarToRowExec => p.child.setTagValue(LORE_DUMP_RDD_TAG, dumpRDDInfo)
+        case c => c.setTagValue(LORE_DUMP_RDD_TAG, dumpRDDInfo)
+      }
+    } else {
+      throw new IllegalArgumentException(s"Subquery plan ${innerPlan.getClass.getSimpleName} " +
+        s"is not a GpuExec")
+    }
+  }
+
+  private def checkUnsupportedOperator(plan: SparkPlan): Unit = {
+    if (plan.children.isEmpty ||
+      plan.isInstanceOf[GpuCustomShuffleReaderExec]
+    ) {
+      throw new UnsupportedOperationException(s"Currently we don't support dumping input of " +
+        s"${plan.getClass.getSimpleName} operator.")
+    }
+  }
+}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/OutputLoreId.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/OutputLoreId.scala
new file mode 100644
index 00000000000..28fa0b2dbbf
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/OutputLoreId.scala
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.lore
+
+import org.apache.hadoop.fs.Path
+
+case class OutputLoreId(loreId: LoreId, partitionIds: Set[Int]) {
+  def outputAllParitions: Boolean = partitionIds.isEmpty
+
+  def shouldOutputPartition(partitionId: Int): Boolean = outputAllParitions ||
+    partitionIds.contains(partitionId)
+}
+
+case class LoreOutputInfo(outputLoreId: OutputLoreId, pathStr: String) {
+  def path: Path = new Path(pathStr)
+}
+
+object OutputLoreId {
+  private val PARTITION_ID_RANGE_REGEX = raw"(\d+)-(\d+)".r("start", "end")
+  private val PARTITION_ID_REGEX = raw"(\d+)".r("partitionId")
+  private val PARTITION_ID_SEP_REGEX = raw" +".r
+
+  private val OUTPUT_LORE_ID_SEP_REGEX = ", *".r
+  private val OUTPUT_LORE_ID_REGEX =
+    raw"(?<loreId>\d+)(\[(?<partitionIds>.*)\])?".r
+
+  def apply(loreId: Int): OutputLoreId = OutputLoreId(loreId, Set.empty)
+
+  def apply(inputStr: String): OutputLoreId = {
+    OUTPUT_LORE_ID_REGEX.findFirstMatchIn(inputStr).map { m =>
+      val loreId = m.group("loreId").toInt
+      val partitionIds: Set[Int] = m.group("partitionIds") match {
+        case partitionIdsStr if partitionIdsStr != null =>
+          PARTITION_ID_SEP_REGEX.split(partitionIdsStr).flatMap {
+            case PARTITION_ID_REGEX(partitionId) =>
+              Seq(partitionId.toInt)
+            case PARTITION_ID_RANGE_REGEX(start, end) =>
+              start.toInt until end.toInt
+            case "*" => Set.empty
+            case partitionIdStr => throw new IllegalArgumentException(s"Invalid partition " +
+              s"id: $partitionIdStr")
+          }.toSet
+        case null => {
+          throw new IllegalArgumentException(s"Invalid output lore id string: $inputStr, " +
+            s"partition ids not found!")
+        }
+      }
+      OutputLoreId(loreId, partitionIds)
+    }.getOrElse(throw new IllegalArgumentException(s"Invalid output lore ids: $inputStr"))
+  }
+
+  def parse(inputStr: String): OutputLoreIds = {
+    require(inputStr != null, "inputStr should not be null")
+
+    OUTPUT_LORE_ID_SEP_REGEX.split(inputStr).map(OutputLoreId(_)).map { outputLoreId =>
+      outputLoreId.loreId -> outputLoreId
+    }.toMap
+  }
+}
+
+
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/dump.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/dump.scala
new file mode 100644
index 00000000000..1b9967e1bf4
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/dump.scala
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.lore
+
+import com.nvidia.spark.rapids.{DumpUtils, GpuColumnVector}
+import com.nvidia.spark.rapids.GpuCoalesceExec.EmptyPartition
+import com.nvidia.spark.rapids.lore.GpuLore.pathOfChild
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.{Partition, SparkContext, TaskContext}
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.rapids.execution.GpuBroadcastHelper
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.SerializableConfiguration
+
+
+case class LoreDumpRDDInfo(idxInParent: Int, loreOutputInfo: LoreOutputInfo, attrs: Seq[Attribute],
+    hadoopConf: Broadcast[SerializableConfiguration])
+
+class GpuLoreDumpRDD(info: LoreDumpRDDInfo, input: RDD[ColumnarBatch])
+  extends RDD[ColumnarBatch](input) with GpuLoreRDD {
+  override def rootPath: Path = pathOfChild(info.loreOutputInfo.path, info.idxInParent)
+
+  def saveMeta(): Unit = {
+    val meta = LoreRDDMeta(input.getNumPartitions, this.getPartitions.map(_.index), info.attrs)
+    GpuLore.dumpObject(meta, pathOfMeta, this.context.hadoopConfiguration)
+  }
+
+  override def compute(split: Partition, context: TaskContext): Iterator[ColumnarBatch] = {
+    if (info.loreOutputInfo.outputLoreId.shouldOutputPartition(split.index)) {
+      val originalIter = input.compute(split, context)
+      new Iterator[ColumnarBatch] {
+        var batchIdx: Int = -1
+        var nextBatch: Option[ColumnarBatch] = None
+
+        override def hasNext: Boolean = {
+          if (batchIdx == -1) {
+            loadNextBatch()
+          }
+          nextBatch.isDefined
+        }
+
+        override def next(): ColumnarBatch = {
+          val ret = dumpCurrentBatch()
+          loadNextBatch()
+          if (!hasNext) {
+            // This is the last batch, save the partition meta
+            val partitionMeta = LoreRDDPartitionMeta(batchIdx, GpuColumnVector.extractTypes(ret))
+            GpuLore.dumpObject(partitionMeta, pathOfPartitionMeta(split.index),
+              info.hadoopConf.value.value)
+          }
+          ret
+        }
+
+        private def dumpCurrentBatch(): ColumnarBatch = {
+          val outputPath = pathOfBatch(split.index, batchIdx)
+          val outputStream = outputPath.getFileSystem(info.hadoopConf.value.value)
+            .create(outputPath, false)
+          DumpUtils.dumpToParquet(nextBatch.get, outputStream)
+          nextBatch.get
+        }
+
+        private def loadNextBatch(): Unit = {
+          if (originalIter.hasNext) {
+            nextBatch = Some(originalIter.next())
+          } else {
+            nextBatch = None
+          }
+          batchIdx += 1
+        }
+      }
+    } else {
+      input.compute(split, context)
+    }
+  }
+
+  override protected def getPartitions: Array[Partition] = {
+    input.partitions
+  }
+}
+
+class SimpleRDD(_sc: SparkContext, data: Broadcast[Any], schema: StructType) extends
+  RDD[ColumnarBatch](_sc, Nil) {
+  override def compute(split: Partition, context: TaskContext): Iterator[ColumnarBatch] = {
+    Seq(GpuBroadcastHelper.getBroadcastBatch(data, schema)).iterator
+  }
+
+  override protected def getPartitions: Array[Partition] = Array(EmptyPartition(0))
+}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/package.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/package.scala
new file mode 100644
index 00000000000..f304ea07d97
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/package.scala
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids
+
+/**
+ * Lore framework is used for dumping input data of a gpu executor to disk so that it can be
+ * replayed in local environment for performance analysis.
+ * <br>
+ * When [[RapidsConf.TAG_LORE_ID_ENABLED]] is set, during the planning phase we will tag a lore
+ * id to each gpu operator. Lore id is guaranteed to be unique within a query, and it's supposed
+ * to be same for operators with same plan.
+ * <br>
+ * When [[RapidsConf.LORE_DUMP_IDS]] is set, during the execution phase we will dump the input
+ * data of gpu operators with lore id to disk. The dumped data can be replayed in local
+ * environment. The dumped data will reside in [[RapidsConf.LORE_DUMP_PATH]]. For more details,
+ * please refer to `docs/dev/lore.md`.
+ */
+package object lore {
+  type LoreId = Int
+  type OutputLoreIds = Map[LoreId, OutputLoreId]
+}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/replay.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/replay.scala
new file mode 100644
index 00000000000..ffbe207646a
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/lore/replay.scala
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.lore
+
+import ai.rapids.cudf.Table
+import com.nvidia.spark.rapids.{GpuColumnVector, GpuExec}
+import com.nvidia.spark.rapids.Arm.withResource
+import org.apache.commons.io.IOUtils
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.{Partition, SparkContext, TaskContext}
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.execution.LeafExecNode
+import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.SerializableConfiguration
+
+case class GpuLoreReplayExec(idxInParent: Int, parentRootPath: String,
+    hadoopConf: Broadcast[SerializableConfiguration])
+  extends LeafExecNode
+  with GpuExec {
+  private lazy val rdd = new GpuLoreReplayRDD(sparkSession.sparkContext,
+    GpuLore.pathOfChild(new Path(parentRootPath), idxInParent).toString, hadoopConf)
+  override def output: Seq[Attribute] = rdd.loreRDDMeta.attrs
+
+  override def doExecute(): RDD[InternalRow] = {
+    throw new UnsupportedOperationException("LoreReplayExec does not support row mode")
+  }
+
+  override protected def internalDoExecuteColumnar(): RDD[ColumnarBatch] = {
+    rdd
+  }
+}
+
+class GpuLoreReplayRDD(sc: SparkContext, rootPathStr: String,
+    hadoopConf: Broadcast[SerializableConfiguration])
+  extends RDD[ColumnarBatch](sc, Nil) with GpuLoreRDD {
+
+  override def rootPath: Path = new Path(rootPathStr)
+
+  private[lore] val loreRDDMeta: LoreRDDMeta = GpuLore.loadObject(pathOfMeta, sc
+    .hadoopConfiguration)
+
+  override def compute(split: Partition, context: TaskContext): Iterator[ColumnarBatch] = {
+    val partitionPath = pathOfPartition(split.index)
+    withResource(partitionPath.getFileSystem(hadoopConf.value.value)) { fs =>
+      if (!fs.exists(partitionPath)) {
+        Iterator.empty
+      } else {
+        val partitionMeta = GpuLore.loadObject[LoreRDDPartitionMeta](
+          pathOfPartitionMeta(split.index), hadoopConf.value.value)
+        new Iterator[ColumnarBatch] {
+          private var batchIdx: Int = 0
+
+          override def hasNext: Boolean = {
+            batchIdx < partitionMeta.numBatches
+          }
+
+          override def next(): ColumnarBatch = {
+            val batchPath = pathOfBatch(split.index, batchIdx)
+            val ret = withResource(batchPath.getFileSystem(hadoopConf.value.value)) { fs =>
+              if (!fs.exists(batchPath)) {
+                throw new IllegalStateException(s"Batch file $batchPath does not exist")
+              }
+              withResource(fs.open(batchPath)) { fin =>
+                val buffer = IOUtils.toByteArray(fin)
+                withResource(Table.readParquet(buffer)) { restoredTable =>
+                  GpuColumnVector.from(restoredTable, partitionMeta.dataType.toArray)
+                }
+              }
+
+            }
+            batchIdx += 1
+            ret
+          }
+        }
+      }
+    }
+  }
+
+  override protected def getPartitions: Array[Partition] = {
+    (0 until loreRDDMeta.numPartitions).map(LoreReplayPartition).toArray
+  }
+}
+
+case class LoreReplayPartition(override val index: Int) extends Partition
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastExchangeExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastExchangeExec.scala
index 51c6f52d97e..bd30459d63e 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastExchangeExec.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastExchangeExec.scala
@@ -31,6 +31,8 @@ import com.nvidia.spark.rapids._
 import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
 import com.nvidia.spark.rapids.GpuMetric._
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
+import com.nvidia.spark.rapids.lore.{GpuLoreDumpRDD, SimpleRDD}
+import com.nvidia.spark.rapids.lore.GpuLore.LORE_DUMP_RDD_TAG
 import com.nvidia.spark.rapids.shims.{ShimBroadcastExchangeLike, ShimUnaryExecNode, SparkShimImpl}
 
 import org.apache.spark.SparkException
@@ -486,7 +488,9 @@ abstract class GpuBroadcastExchangeExecBase(
       throw new IllegalStateException("A canonicalized plan is not supposed to be executed.")
     }
     try {
-      relationFuture.get(timeout, TimeUnit.SECONDS).asInstanceOf[Broadcast[T]]
+      val ret = relationFuture.get(timeout, TimeUnit.SECONDS)
+      doLoreDump(ret)
+      ret.asInstanceOf[Broadcast[T]]
     } catch {
       case ex: TimeoutException =>
         logError(s"Could not execute broadcast in $timeout secs.", ex)
@@ -501,6 +505,18 @@ abstract class GpuBroadcastExchangeExecBase(
     }
   }
 
+  // We have to do this explicitly here rather than similar to the general version one in
+  // [[GpuExec]] since in adaptive execution, the broadcast value has already been calculated
+  // before we tag this plan to dump.
+  private def doLoreDump(result: Broadcast[Any]): Unit = {
+    val inner = new SimpleRDD(session.sparkContext, result, schema)
+    getTagValue(LORE_DUMP_RDD_TAG).foreach { info =>
+      val rdd = new GpuLoreDumpRDD(info, inner)
+      rdd.saveMeta()
+      rdd.foreach(_.close())
+    }
+  }
+
   override def runtimeStatistics: Statistics = {
     Statistics(
       sizeInBytes = metrics("dataSize").value,
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala
index 7cc94359daa..f1ffcf4df1f 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala
@@ -157,7 +157,7 @@ case class GpuWriteFilesExec(
         s" mismatch:\n$this")
   }
 
-  override protected def stringArgs: Iterator[Any] = Iterator(child)
+  override def stringArgs: Iterator[Any] = Iterator(child)
 }
 
 object GpuWriteFiles {
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/lore/GpuLoreSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/lore/GpuLoreSuite.scala
new file mode 100644
index 00000000000..7db46718e89
--- /dev/null
+++ b/tests/src/test/scala/com/nvidia/spark/rapids/lore/GpuLoreSuite.scala
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.lore
+
+import com.nvidia.spark.rapids.{FunSuiteWithTempDir, GpuColumnarToRowExec, RapidsConf, SparkQueryCompareTestSuite}
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{functions, DataFrame, SparkSession}
+import org.apache.spark.sql.internal.SQLConf
+
+class GpuLoreSuite extends SparkQueryCompareTestSuite with FunSuiteWithTempDir with Logging {
+  test("Aggregate") {
+    doTestReplay("10[*]") { spark =>
+      spark.range(0, 1000, 1, 100)
+        .selectExpr("id % 10 as key", "id % 100 as value")
+        .groupBy("key")
+        .agg(functions.sum("value").as("total"))
+    }
+  }
+
+  test("Broadcast join") {
+    doTestReplay("32[*]") { spark =>
+      val df1 = spark.range(0, 1000, 1, 10)
+        .selectExpr("id % 10 as key", "id % 100 as value")
+        .groupBy("key")
+        .agg(functions.sum("value").as("count"))
+
+      val df2 = spark.range(0, 1000, 1, 10)
+        .selectExpr("(id % 10 + 5) as key", "id % 100 as value")
+        .groupBy("key")
+        .agg(functions.sum("value").as("count"))
+
+      df1.join(df2, Seq("key"))
+    }
+  }
+
+  test("Subquery Filter") {
+    doTestReplay("13[*]") { spark =>
+      spark.range(0, 100, 1, 10)
+        .createTempView("df1")
+
+      spark.range(50, 1000, 1, 10)
+        .createTempView("df2")
+
+      spark.sql("select * from df1 where id > (select max(id) from df2)")
+    }
+  }
+
+  test("Subquery in projection") {
+    doTestReplay("11[*]") { spark =>
+      spark.sql(
+        """
+          |CREATE TEMPORARY VIEW t1
+          |AS SELECT * FROM VALUES
+          |(1, "a"),
+          |(2, "a"),
+          |(3, "a") t(id, value)
+          |""".stripMargin)
+
+      spark.sql(
+        """
+          |SELECT *, (SELECT COUNT(*) FROM t1) FROM t1
+          |""".stripMargin)
+    }
+  }
+
+  test("No broadcast join") {
+    doTestReplay("30[*]") { spark =>
+      spark.conf.set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1")
+
+      val df1 = spark.range(0, 1000, 1, 10)
+        .selectExpr("id % 10 as key", "id % 100 as value")
+        .groupBy("key")
+        .agg(functions.sum("value").as("count"))
+
+      val df2 = spark.range(0, 1000, 1, 10)
+        .selectExpr("(id % 10 + 5) as key", "id % 100 as value")
+        .groupBy("key")
+        .agg(functions.sum("value").as("count"))
+
+      df1.join(df2, Seq("key"))
+    }
+  }
+
+  test("AQE broadcast") {
+    doTestReplay("90[*]") { spark =>
+      spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true")
+
+      val df1 = spark.range(0, 1000, 1, 10)
+        .selectExpr("id % 10 as key", "id % 100 as value")
+        .groupBy("key")
+        .agg(functions.sum("value").as("count"))
+
+      val df2 = spark.range(0, 1000, 1, 10)
+        .selectExpr("(id % 10 + 5) as key", "id % 100 as value")
+        .groupBy("key")
+        .agg(functions.sum("value").as("count"))
+
+      df1.join(df2, Seq("key"))
+    }
+  }
+
+  test("AQE Exchange") {
+    doTestReplay("28[*]") { spark =>
+      spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true")
+
+      spark.range(0, 1000, 1, 100)
+        .selectExpr("id % 10 as key", "id % 100 as value")
+        .groupBy("key")
+        .agg(functions.sum("value").as("total"))
+    }
+  }
+
+  test("Partition only") {
+    withGpuSparkSession{ spark =>
+      spark.conf.set(RapidsConf.LORE_DUMP_PATH.key, TEST_FILES_ROOT.getAbsolutePath)
+      spark.conf.set(RapidsConf.LORE_DUMP_IDS.key, "3[0 2]")
+
+      val df = spark.range(0, 1000, 1, 100)
+        .selectExpr("id % 10 as key", "id % 100 as value")
+
+      val res = df.collect().length
+      println(s"Length of original: $res")
+
+
+      val restoredRes = GpuColumnarToRowExec(GpuLore.restoreGpuExec(
+        new Path(s"${TEST_FILES_ROOT.getAbsolutePath}/loreId-3"), spark))
+        .executeCollect()
+        .length
+
+      assert(20 == restoredRes)
+    }
+  }
+
+  private def doTestReplay(loreDumpIds: String)(dfFunc: SparkSession => DataFrame) = {
+    val loreId = OutputLoreId.parse(loreDumpIds).head._1
+    withGpuSparkSession { spark =>
+      spark.conf.set(RapidsConf.LORE_DUMP_PATH.key, TEST_FILES_ROOT.getAbsolutePath)
+      spark.conf.set(RapidsConf.LORE_DUMP_IDS.key, loreDumpIds)
+
+      val df = dfFunc(spark)
+
+      val expectedLength = df.collect().length
+
+      val restoredResultLength = GpuColumnarToRowExec(GpuLore.restoreGpuExec(
+        new Path(s"${TEST_FILES_ROOT.getAbsolutePath}/loreId-$loreId"),
+        spark))
+        .executeCollect()
+        .length
+
+      assert(expectedLength == restoredResultLength)
+    }
+  }
+}
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/lore/OutputLoreIdSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/lore/OutputLoreIdSuite.scala
new file mode 100644
index 00000000000..aad3d997b9d
--- /dev/null
+++ b/tests/src/test/scala/com/nvidia/spark/rapids/lore/OutputLoreIdSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.lore
+
+import org.scalatest.funsuite.AnyFunSuite
+
+class OutputLoreIdSuite extends AnyFunSuite {
+  test("Parse one output lore id") {
+    val expectedLoreIds = Map(1 -> OutputLoreId(1, Set(1, 2, 4, 8)))
+    val loreIds = OutputLoreId.parse("1[1 2 4 8]")
+
+    assert(loreIds == expectedLoreIds)
+  }
+
+  test("Parse multi output lore id") {
+    val expectedLoreIds = Map(
+      1 -> OutputLoreId(1, Set(1, 2, 4, 8)),
+      2 -> OutputLoreId(2, Set(1, 4, 5, 6, 7, 8, 100))
+    )
+    val loreIds = OutputLoreId.parse("1[1 2 4 8], 2[1 4-9 100]")
+
+    assert(loreIds == expectedLoreIds)
+  }
+
+  test("Parse empty output lore id should fail") {
+    assertThrows[IllegalArgumentException] {
+      OutputLoreId.parse(" 1, 2 ")
+    }
+  }
+
+  test("Parse mixed") {
+    val expectedLoreIds = Map(
+      1 -> OutputLoreId(1),
+      2 -> OutputLoreId(2, Set(4, 5, 8)),
+      3 -> OutputLoreId(3, Set(1, 2, 4, 8))
+    )
+    val loreIds = OutputLoreId.parse("1[*], 2[4-6 8] , 3[1 2 4 8]")
+
+    assert(loreIds == expectedLoreIds)
+  }
+}

From 77ee0ab94ed63b9c2ff58a6d3254f0d788d59dfc Mon Sep 17 00:00:00 2001
From: Firestarman <firestarmanllc@gmail.com>
Date: Fri, 28 Jun 2024 05:04:04 +0000
Subject: [PATCH 4/4] Support minBy on GPU

Signed-off-by: Firestarman <firestarmanllc@gmail.com>
---
 .../advanced_configs.md                       |   1 +
 docs/supported_ops.md                         | 262 ++++++++++++++----
 .../src/main/python/hash_aggregate_test.py    |  14 +
 .../nvidia/spark/rapids/GpuOverrides.scala    |  27 ++
 .../rapids/aggregate/aggregateFunctions.scala | 116 ++++++++
 tools/generated_files/311/operatorsScore.csv  |   1 +
 tools/generated_files/311/supportedExprs.csv  |   6 +
 tools/generated_files/312/operatorsScore.csv  |   1 +
 tools/generated_files/312/supportedExprs.csv  |   6 +
 tools/generated_files/313/operatorsScore.csv  |   1 +
 tools/generated_files/313/supportedExprs.csv  |   6 +
 tools/generated_files/320/operatorsScore.csv  |   1 +
 tools/generated_files/320/supportedExprs.csv  |   6 +
 tools/generated_files/321/operatorsScore.csv  |   1 +
 tools/generated_files/321/supportedExprs.csv  |   6 +
 .../generated_files/321cdh/operatorsScore.csv |   1 +
 .../generated_files/321cdh/supportedExprs.csv |   6 +
 tools/generated_files/322/operatorsScore.csv  |   1 +
 tools/generated_files/322/supportedExprs.csv  |   6 +
 tools/generated_files/323/operatorsScore.csv  |   1 +
 tools/generated_files/323/supportedExprs.csv  |   6 +
 tools/generated_files/324/operatorsScore.csv  |   1 +
 tools/generated_files/324/supportedExprs.csv  |   6 +
 tools/generated_files/330/operatorsScore.csv  |   1 +
 tools/generated_files/330/supportedExprs.csv  |   6 +
 .../generated_files/330cdh/operatorsScore.csv |   1 +
 .../generated_files/330cdh/supportedExprs.csv |   6 +
 tools/generated_files/331/operatorsScore.csv  |   1 +
 tools/generated_files/331/supportedExprs.csv  |   6 +
 tools/generated_files/332/operatorsScore.csv  |   1 +
 tools/generated_files/332/supportedExprs.csv  |   6 +
 .../generated_files/332cdh/operatorsScore.csv |   1 +
 .../generated_files/332cdh/supportedExprs.csv |   6 +
 tools/generated_files/333/operatorsScore.csv  |   1 +
 tools/generated_files/333/supportedExprs.csv  |   6 +
 tools/generated_files/334/operatorsScore.csv  |   1 +
 tools/generated_files/334/supportedExprs.csv  |   6 +
 tools/generated_files/340/operatorsScore.csv  |   1 +
 tools/generated_files/340/supportedExprs.csv  |   6 +
 tools/generated_files/341/operatorsScore.csv  |   1 +
 tools/generated_files/341/supportedExprs.csv  |   6 +
 tools/generated_files/342/operatorsScore.csv  |   1 +
 tools/generated_files/342/supportedExprs.csv  |   6 +
 tools/generated_files/343/operatorsScore.csv  |   1 +
 tools/generated_files/343/supportedExprs.csv  |   6 +
 tools/generated_files/350/operatorsScore.csv  |   1 +
 tools/generated_files/350/supportedExprs.csv  |   6 +
 tools/generated_files/351/operatorsScore.csv  |   1 +
 tools/generated_files/351/supportedExprs.csv  |   6 +
 tools/generated_files/operatorsScore.csv      |   1 +
 tools/generated_files/supportedExprs.csv      |   6 +
 51 files changed, 529 insertions(+), 52 deletions(-)

diff --git a/docs/additional-functionality/advanced_configs.md b/docs/additional-functionality/advanced_configs.md
index f5d511cbbc5..6694aeb0e9e 100644
--- a/docs/additional-functionality/advanced_configs.md
+++ b/docs/additional-functionality/advanced_configs.md
@@ -406,6 +406,7 @@ Name | SQL Function(s) | Description | Default Value | Notes
 <a name="sql.expression.Last"></a>spark.rapids.sql.expression.Last|`last_value`, `last`|last aggregate operator|true|None|
 <a name="sql.expression.Max"></a>spark.rapids.sql.expression.Max|`max`|Max aggregate operator|true|None|
 <a name="sql.expression.Min"></a>spark.rapids.sql.expression.Min|`min`|Min aggregate operator|true|None|
+<a name="sql.expression.MinBy"></a>spark.rapids.sql.expression.MinBy|`min_by`|MinBy aggregate operator. It may produce different results than CPU when multiple rows in a group have same minimum value in the ordering column and different associated values in the value column.|true|None|
 <a name="sql.expression.Percentile"></a>spark.rapids.sql.expression.Percentile|`percentile`|Aggregation computing exact percentile|true|None|
 <a name="sql.expression.PivotFirst"></a>spark.rapids.sql.expression.PivotFirst| |PivotFirst operator|true|None|
 <a name="sql.expression.StddevPop"></a>spark.rapids.sql.expression.StddevPop|`stddev_pop`|Aggregation computing population standard deviation|true|None|
diff --git a/docs/supported_ops.md b/docs/supported_ops.md
index fbafcfbf81d..57693b55623 100644
--- a/docs/supported_ops.md
+++ b/docs/supported_ops.md
@@ -18090,6 +18090,138 @@ are limited.
 <th>UDT</th>
 </tr>
 <tr>
+<td rowSpan="6">MinBy</td>
+<td rowSpan="6">`min_by`</td>
+<td rowSpan="6">MinBy aggregate operator. It may produce different results than CPU when multiple rows in a group have same minimum value in the ordering column and different associated values in the value column.</td>
+<td rowSpan="6">None</td>
+<td rowSpan="3">aggregation</td>
+<td>value</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td>ordering</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
+<td> </td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td>result</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td rowSpan="3">reduction</td>
+<td>value</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td>ordering</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><b>NS</b></td>
+<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
+<td> </td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
+<td>result</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td>S</td>
+<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types CALENDAR, UDT</em></td>
+<td><b>NS</b></td>
+</tr>
+<tr>
 <td rowSpan="8">Percentile</td>
 <td rowSpan="8">`percentile`</td>
 <td rowSpan="8">Aggregation computing exact percentile</td>
@@ -18396,6 +18528,32 @@ are limited.
 <td><b>NS</b></td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+</tr>
+<tr>
 <td rowSpan="6">StddevPop</td>
 <td rowSpan="6">`stddev_pop`</td>
 <td rowSpan="6">Aggregation computing population standard deviation</td>
@@ -18529,32 +18687,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-</tr>
-<tr>
 <td rowSpan="6">StddevSamp</td>
 <td rowSpan="6">`std`, `stddev_samp`, `stddev`</td>
 <td rowSpan="6">Aggregation computing sample standard deviation</td>
@@ -18821,6 +18953,32 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+</tr>
+<tr>
 <td rowSpan="6">VariancePop</td>
 <td rowSpan="6">`var_pop`</td>
 <td rowSpan="6">Aggregation computing population variance</td>
@@ -18954,32 +19112,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-</tr>
-<tr>
 <td rowSpan="6">VarianceSamp</td>
 <td rowSpan="6">`var_samp`, `variance`</td>
 <td rowSpan="6">Aggregation computing sample variance</td>
@@ -19186,6 +19318,32 @@ are limited.
 <td><b>NS</b></td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+</tr>
+<tr>
 <td rowSpan="2">HiveGenericUDF</td>
 <td rowSpan="2"> </td>
 <td rowSpan="2">Hive Generic UDF, the UDF can choose to implement a RAPIDS accelerated interface to get better performance</td>
diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index d1cd70aa43c..796865bc376 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -1276,6 +1276,20 @@ def test_generic_reductions(data_gen):
             'count(1)'),
         conf=local_conf)
 
+@ignore_order(local=True)
+def test_hash_groupby_with_minby():
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark: three_col_df(spark, int_gen, int_gen, UniqueLongGen())
+            .groupby('a').agg(f.min_by('b', 'c'))
+    )
+
+@ignore_order(local=True)
+def test_reduction_with_minby():
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark: two_col_df(spark, int_gen, UniqueLongGen()).selectExpr(
+            "min_by(a, b)")
+    )
+
 @pytest.mark.parametrize('data_gen', all_gen + _nested_gens, ids=idfn)
 @allow_non_gpu(*non_utc_allow)
 def test_count(data_gen):
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
index 73475ef36f5..d0d525090fe 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -2265,6 +2265,33 @@ object GpuOverrides extends Logging {
         // Last does not overflow, so it doesn't need the ANSI check
         override val needsAnsiCheck: Boolean = false
       }),
+    expr[MinBy](
+      "MinBy aggregate operator. It may produce different results than CPU when " +
+        "multiple rows in a group have same minimum value in the ordering column and " +
+        "different associated values in the value column.",
+      ExprChecks.reductionAndGroupByAgg(
+        (TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP + TypeSig.BINARY +
+          TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128).nested(),
+        TypeSig.all,
+        Seq(
+          ParamCheck("value", (TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP + TypeSig.BINARY
+            + TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128).nested(),
+            TypeSig.all),
+          ParamCheck("ordering", (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL
+            + TypeSig.STRUCT + TypeSig.ARRAY).nested(),
+            TypeSig.orderable))
+      ),
+      (minBy, conf, p, r) => new AggExprMeta[MinBy](minBy, conf, p, r) {
+
+        override def convertToGpu(childExprs: Seq[Expression]): GpuExpression = {
+          // Only two children (value expression, ordering expression)
+          require(childExprs.length == 2)
+          GpuMinBy(childExprs.head, childExprs.last)
+        }
+
+        // MinBy does not overflow, so it doesn't need the ANSI check
+        override val needsAnsiCheck: Boolean = false
+      }),
     expr[BRound](
       "Round an expression to d decimal places using HALF_EVEN rounding mode",
       ExprChecks.binaryProject(
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala
index 2a0c2176109..0c14bba7055 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala
@@ -2023,3 +2023,119 @@ case class GpuVarianceSamp(child: Expression, nullOnDivideByZero: Boolean)
 
   override def prettyName: String = "var_samp"
 }
+
+object CudfMaxMinBy {
+  val KEY_VALUE: String = "_key_value"
+  val KEY_ORDERING: String = "_key_ordering"
+}
+
+abstract class CudfMaxMinByAggregate(
+    valueType: DataType,
+    orderingType: DataType) extends CudfAggregate {
+
+  protected val sortOrder: Int => cudf.OrderByArg
+
+  // This is a short term solution. and better to have a dedicate reduction for this.
+  override lazy val reductionAggregate: cudf.ColumnVector => cudf.Scalar = col => {
+    val orderCol = col.getChildColumnView(1).copyToColumnVector()
+    val tmpTable = withResource(orderCol)(_ =>new cudf.Table(orderCol, col))
+    val sorted = withResource(tmpTable) { _ =>
+      // columns in table [order, original struct]
+      tmpTable.orderBy(sortOrder(0))
+    }
+    withResource(sorted) { _ =>
+      sorted.getColumn(1).reduce(ReductionAggregation.nth(0, NullPolicy.INCLUDE))
+    }
+  }
+
+  override val dataType: DataType = StructType(Seq(
+    StructField(CudfMaxMinBy.KEY_VALUE, valueType),
+    StructField(CudfMaxMinBy.KEY_ORDERING, orderingType)))
+}
+
+class CudfMaxBy(valueType: DataType, orderingType: DataType)
+  extends CudfMaxMinByAggregate(valueType, orderingType) {
+
+  override val name: String = "CudfMaxBy"
+  override lazy val sortOrder: Int => cudf.OrderByArg =
+    i => cudf.OrderByArg.desc(i, true)
+  // TODO
+  override lazy val groupByAggregate: GroupByAggregation = null
+}
+
+class CudfMinBy(valueType: DataType, orderingType: DataType)
+  extends CudfMaxMinByAggregate(valueType, orderingType) {
+
+  override val name: String = "CudfMinBy"
+  override lazy val sortOrder: Int => cudf.OrderByArg =
+    i => cudf.OrderByArg.asc(i, false)
+  override lazy val groupByAggregate: GroupByAggregation = GroupByAggregation.minBy()
+}
+
+abstract class GpuMaxMinByBase(valueExpr: Expression, orderingExpr: Expression)
+  extends GpuAggregateFunction with Serializable {
+
+  protected val cudfMaxMinByAggregate: CudfAggregate
+
+  private lazy val bufferValue: AttributeReference =
+    AttributeReference("value", valueExpr.dataType)()
+
+  private lazy val bufferOrdering: AttributeReference =
+    AttributeReference("ordering", orderingExpr.dataType)()
+
+  // Cudf allows only one column as input, so wrap value and ordering columns by
+  // a struct before just going into cuDF.
+  private def createStructExpression(value: Expression, order: Expression): Expression =
+    GpuCreateNamedStruct(Seq(
+      GpuLiteral(CudfMaxMinBy.KEY_VALUE, StringType), value,
+      GpuLiteral(CudfMaxMinBy.KEY_ORDERING, StringType), order))
+
+  // Extract the value and ordering columns from cuDF results
+  // to match the expectation of Spark.
+  private def extractChildren: Seq[Expression] = Seq(
+    GpuGetStructField(cudfMaxMinByAggregate.attr, 0, Some(CudfMaxMinBy.KEY_VALUE)),
+    GpuGetStructField(cudfMaxMinByAggregate.attr, 1, Some(CudfMaxMinBy.KEY_ORDERING))
+  )
+
+  override lazy val initialValues: Seq[Expression] = Seq(
+    GpuLiteral(null, valueExpr.dataType), GpuLiteral(null, orderingExpr.dataType))
+
+  override lazy val inputProjection: Seq[Expression] = Seq(
+    createStructExpression(valueExpr, orderingExpr))
+  override lazy val updateAggregates: Seq[CudfAggregate] = Seq(cudfMaxMinByAggregate)
+  override lazy val postUpdate: Seq[Expression] = extractChildren
+
+  override lazy val preMerge: Seq[Expression] = Seq(
+    createStructExpression(bufferValue, bufferOrdering))
+  override lazy val mergeAggregates: Seq[CudfAggregate] = Seq(cudfMaxMinByAggregate)
+  override lazy val postMerge: Seq[Expression] = extractChildren
+
+  override lazy val evaluateExpression: Expression = bufferValue
+
+  override def aggBufferAttributes: Seq[AttributeReference] = Seq(bufferValue, bufferOrdering)
+
+  override def children: Seq[Expression] = Seq(valueExpr, orderingExpr)
+
+  override def nullable: Boolean = true
+
+  // Return data type.
+  override def dataType: DataType = valueExpr.dataType
+}
+
+case class GpuMaxBy(valueExpr: Expression, orderingExpr: Expression)
+  extends GpuMaxMinByBase(valueExpr, orderingExpr) {
+
+  override def prettyName: String = "max_by"
+
+  override protected lazy val cudfMaxMinByAggregate: CudfAggregate =
+    new CudfMaxBy(valueExpr.dataType, orderingExpr.dataType)
+}
+
+case class GpuMinBy(valueExpr: Expression, orderingExpr: Expression)
+  extends GpuMaxMinByBase(valueExpr, orderingExpr) {
+
+  override def prettyName: String = "min_by"
+
+  override protected lazy val cudfMaxMinByAggregate: CudfAggregate =
+    new CudfMinBy(valueExpr.dataType, orderingExpr.dataType)
+}
diff --git a/tools/generated_files/311/operatorsScore.csv b/tools/generated_files/311/operatorsScore.csv
index e3f8d1053c1..2bb696b4d87 100644
--- a/tools/generated_files/311/operatorsScore.csv
+++ b/tools/generated_files/311/operatorsScore.csv
@@ -172,6 +172,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/311/supportedExprs.csv b/tools/generated_files/311/supportedExprs.csv
index 5f57725522f..1e590202a05 100644
--- a/tools/generated_files/311/supportedExprs.csv
+++ b/tools/generated_files/311/supportedExprs.csv
@@ -687,6 +687,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
diff --git a/tools/generated_files/312/operatorsScore.csv b/tools/generated_files/312/operatorsScore.csv
index e3f8d1053c1..2bb696b4d87 100644
--- a/tools/generated_files/312/operatorsScore.csv
+++ b/tools/generated_files/312/operatorsScore.csv
@@ -172,6 +172,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/312/supportedExprs.csv b/tools/generated_files/312/supportedExprs.csv
index 5f57725522f..1e590202a05 100644
--- a/tools/generated_files/312/supportedExprs.csv
+++ b/tools/generated_files/312/supportedExprs.csv
@@ -687,6 +687,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
diff --git a/tools/generated_files/313/operatorsScore.csv b/tools/generated_files/313/operatorsScore.csv
index e3f8d1053c1..2bb696b4d87 100644
--- a/tools/generated_files/313/operatorsScore.csv
+++ b/tools/generated_files/313/operatorsScore.csv
@@ -172,6 +172,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/313/supportedExprs.csv b/tools/generated_files/313/supportedExprs.csv
index 5f57725522f..1e590202a05 100644
--- a/tools/generated_files/313/supportedExprs.csv
+++ b/tools/generated_files/313/supportedExprs.csv
@@ -687,6 +687,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
diff --git a/tools/generated_files/320/operatorsScore.csv b/tools/generated_files/320/operatorsScore.csv
index a9606fe77d9..7236ad2922c 100644
--- a/tools/generated_files/320/operatorsScore.csv
+++ b/tools/generated_files/320/operatorsScore.csv
@@ -176,6 +176,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/320/supportedExprs.csv b/tools/generated_files/320/supportedExprs.csv
index 937ed7ae569..47b47f90b58 100644
--- a/tools/generated_files/320/supportedExprs.csv
+++ b/tools/generated_files/320/supportedExprs.csv
@@ -687,6 +687,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/321/operatorsScore.csv b/tools/generated_files/321/operatorsScore.csv
index a9606fe77d9..7236ad2922c 100644
--- a/tools/generated_files/321/operatorsScore.csv
+++ b/tools/generated_files/321/operatorsScore.csv
@@ -176,6 +176,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/321/supportedExprs.csv b/tools/generated_files/321/supportedExprs.csv
index 937ed7ae569..47b47f90b58 100644
--- a/tools/generated_files/321/supportedExprs.csv
+++ b/tools/generated_files/321/supportedExprs.csv
@@ -687,6 +687,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/321cdh/operatorsScore.csv b/tools/generated_files/321cdh/operatorsScore.csv
index a9606fe77d9..7236ad2922c 100644
--- a/tools/generated_files/321cdh/operatorsScore.csv
+++ b/tools/generated_files/321cdh/operatorsScore.csv
@@ -176,6 +176,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/321cdh/supportedExprs.csv b/tools/generated_files/321cdh/supportedExprs.csv
index 937ed7ae569..47b47f90b58 100644
--- a/tools/generated_files/321cdh/supportedExprs.csv
+++ b/tools/generated_files/321cdh/supportedExprs.csv
@@ -687,6 +687,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/322/operatorsScore.csv b/tools/generated_files/322/operatorsScore.csv
index a9606fe77d9..7236ad2922c 100644
--- a/tools/generated_files/322/operatorsScore.csv
+++ b/tools/generated_files/322/operatorsScore.csv
@@ -176,6 +176,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/322/supportedExprs.csv b/tools/generated_files/322/supportedExprs.csv
index 937ed7ae569..47b47f90b58 100644
--- a/tools/generated_files/322/supportedExprs.csv
+++ b/tools/generated_files/322/supportedExprs.csv
@@ -687,6 +687,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/323/operatorsScore.csv b/tools/generated_files/323/operatorsScore.csv
index a9606fe77d9..7236ad2922c 100644
--- a/tools/generated_files/323/operatorsScore.csv
+++ b/tools/generated_files/323/operatorsScore.csv
@@ -176,6 +176,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/323/supportedExprs.csv b/tools/generated_files/323/supportedExprs.csv
index 937ed7ae569..47b47f90b58 100644
--- a/tools/generated_files/323/supportedExprs.csv
+++ b/tools/generated_files/323/supportedExprs.csv
@@ -687,6 +687,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/324/operatorsScore.csv b/tools/generated_files/324/operatorsScore.csv
index a9606fe77d9..7236ad2922c 100644
--- a/tools/generated_files/324/operatorsScore.csv
+++ b/tools/generated_files/324/operatorsScore.csv
@@ -176,6 +176,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/324/supportedExprs.csv b/tools/generated_files/324/supportedExprs.csv
index 937ed7ae569..47b47f90b58 100644
--- a/tools/generated_files/324/supportedExprs.csv
+++ b/tools/generated_files/324/supportedExprs.csv
@@ -687,6 +687,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/330/operatorsScore.csv b/tools/generated_files/330/operatorsScore.csv
index 7de435ebbc7..c41c008ca34 100644
--- a/tools/generated_files/330/operatorsScore.csv
+++ b/tools/generated_files/330/operatorsScore.csv
@@ -182,6 +182,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/330/supportedExprs.csv b/tools/generated_files/330/supportedExprs.csv
index ce504a2ca68..bc6c908f47c 100644
--- a/tools/generated_files/330/supportedExprs.csv
+++ b/tools/generated_files/330/supportedExprs.csv
@@ -712,6 +712,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/330cdh/operatorsScore.csv b/tools/generated_files/330cdh/operatorsScore.csv
index 7de435ebbc7..c41c008ca34 100644
--- a/tools/generated_files/330cdh/operatorsScore.csv
+++ b/tools/generated_files/330cdh/operatorsScore.csv
@@ -182,6 +182,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/330cdh/supportedExprs.csv b/tools/generated_files/330cdh/supportedExprs.csv
index ce504a2ca68..bc6c908f47c 100644
--- a/tools/generated_files/330cdh/supportedExprs.csv
+++ b/tools/generated_files/330cdh/supportedExprs.csv
@@ -712,6 +712,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/331/operatorsScore.csv b/tools/generated_files/331/operatorsScore.csv
index b1168d0fa6b..496dfa28eaf 100644
--- a/tools/generated_files/331/operatorsScore.csv
+++ b/tools/generated_files/331/operatorsScore.csv
@@ -183,6 +183,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/331/supportedExprs.csv b/tools/generated_files/331/supportedExprs.csv
index 44a7a8b977c..e1d3f85e0e3 100644
--- a/tools/generated_files/331/supportedExprs.csv
+++ b/tools/generated_files/331/supportedExprs.csv
@@ -714,6 +714,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/332/operatorsScore.csv b/tools/generated_files/332/operatorsScore.csv
index b1168d0fa6b..496dfa28eaf 100644
--- a/tools/generated_files/332/operatorsScore.csv
+++ b/tools/generated_files/332/operatorsScore.csv
@@ -183,6 +183,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/332/supportedExprs.csv b/tools/generated_files/332/supportedExprs.csv
index 44a7a8b977c..e1d3f85e0e3 100644
--- a/tools/generated_files/332/supportedExprs.csv
+++ b/tools/generated_files/332/supportedExprs.csv
@@ -714,6 +714,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/332cdh/operatorsScore.csv b/tools/generated_files/332cdh/operatorsScore.csv
index b1168d0fa6b..496dfa28eaf 100644
--- a/tools/generated_files/332cdh/operatorsScore.csv
+++ b/tools/generated_files/332cdh/operatorsScore.csv
@@ -183,6 +183,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/332cdh/supportedExprs.csv b/tools/generated_files/332cdh/supportedExprs.csv
index 44a7a8b977c..e1d3f85e0e3 100644
--- a/tools/generated_files/332cdh/supportedExprs.csv
+++ b/tools/generated_files/332cdh/supportedExprs.csv
@@ -714,6 +714,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/333/operatorsScore.csv b/tools/generated_files/333/operatorsScore.csv
index b1168d0fa6b..496dfa28eaf 100644
--- a/tools/generated_files/333/operatorsScore.csv
+++ b/tools/generated_files/333/operatorsScore.csv
@@ -183,6 +183,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/333/supportedExprs.csv b/tools/generated_files/333/supportedExprs.csv
index 44a7a8b977c..e1d3f85e0e3 100644
--- a/tools/generated_files/333/supportedExprs.csv
+++ b/tools/generated_files/333/supportedExprs.csv
@@ -714,6 +714,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/334/operatorsScore.csv b/tools/generated_files/334/operatorsScore.csv
index b1168d0fa6b..496dfa28eaf 100644
--- a/tools/generated_files/334/operatorsScore.csv
+++ b/tools/generated_files/334/operatorsScore.csv
@@ -183,6 +183,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/334/supportedExprs.csv b/tools/generated_files/334/supportedExprs.csv
index 44a7a8b977c..e1d3f85e0e3 100644
--- a/tools/generated_files/334/supportedExprs.csv
+++ b/tools/generated_files/334/supportedExprs.csv
@@ -714,6 +714,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/340/operatorsScore.csv b/tools/generated_files/340/operatorsScore.csv
index 161fcc90e7b..23e5c0979eb 100644
--- a/tools/generated_files/340/operatorsScore.csv
+++ b/tools/generated_files/340/operatorsScore.csv
@@ -185,6 +185,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/340/supportedExprs.csv b/tools/generated_files/340/supportedExprs.csv
index 63bfecc4ce3..88a7674c867 100644
--- a/tools/generated_files/340/supportedExprs.csv
+++ b/tools/generated_files/340/supportedExprs.csv
@@ -714,6 +714,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/341/operatorsScore.csv b/tools/generated_files/341/operatorsScore.csv
index 161fcc90e7b..23e5c0979eb 100644
--- a/tools/generated_files/341/operatorsScore.csv
+++ b/tools/generated_files/341/operatorsScore.csv
@@ -185,6 +185,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/341/supportedExprs.csv b/tools/generated_files/341/supportedExprs.csv
index 63bfecc4ce3..88a7674c867 100644
--- a/tools/generated_files/341/supportedExprs.csv
+++ b/tools/generated_files/341/supportedExprs.csv
@@ -714,6 +714,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/342/operatorsScore.csv b/tools/generated_files/342/operatorsScore.csv
index 161fcc90e7b..23e5c0979eb 100644
--- a/tools/generated_files/342/operatorsScore.csv
+++ b/tools/generated_files/342/operatorsScore.csv
@@ -185,6 +185,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/342/supportedExprs.csv b/tools/generated_files/342/supportedExprs.csv
index 63bfecc4ce3..88a7674c867 100644
--- a/tools/generated_files/342/supportedExprs.csv
+++ b/tools/generated_files/342/supportedExprs.csv
@@ -714,6 +714,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/343/operatorsScore.csv b/tools/generated_files/343/operatorsScore.csv
index 161fcc90e7b..23e5c0979eb 100644
--- a/tools/generated_files/343/operatorsScore.csv
+++ b/tools/generated_files/343/operatorsScore.csv
@@ -185,6 +185,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/343/supportedExprs.csv b/tools/generated_files/343/supportedExprs.csv
index 63bfecc4ce3..88a7674c867 100644
--- a/tools/generated_files/343/supportedExprs.csv
+++ b/tools/generated_files/343/supportedExprs.csv
@@ -714,6 +714,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/350/operatorsScore.csv b/tools/generated_files/350/operatorsScore.csv
index d98c632ac68..946bf2a3949 100644
--- a/tools/generated_files/350/operatorsScore.csv
+++ b/tools/generated_files/350/operatorsScore.csv
@@ -186,6 +186,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/350/supportedExprs.csv b/tools/generated_files/350/supportedExprs.csv
index 6c34e85c530..1eaf8676add 100644
--- a/tools/generated_files/350/supportedExprs.csv
+++ b/tools/generated_files/350/supportedExprs.csv
@@ -722,6 +722,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/351/operatorsScore.csv b/tools/generated_files/351/operatorsScore.csv
index d98c632ac68..946bf2a3949 100644
--- a/tools/generated_files/351/operatorsScore.csv
+++ b/tools/generated_files/351/operatorsScore.csv
@@ -186,6 +186,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/351/supportedExprs.csv b/tools/generated_files/351/supportedExprs.csv
index 6c34e85c530..1eaf8676add 100644
--- a/tools/generated_files/351/supportedExprs.csv
+++ b/tools/generated_files/351/supportedExprs.csv
@@ -722,6 +722,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,N
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/operatorsScore.csv b/tools/generated_files/operatorsScore.csv
index e3f8d1053c1..2bb696b4d87 100644
--- a/tools/generated_files/operatorsScore.csv
+++ b/tools/generated_files/operatorsScore.csv
@@ -172,6 +172,7 @@ Md5,4
 MicrosToTimestamp,4
 MillisToTimestamp,4
 Min,4
+MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
diff --git a/tools/generated_files/supportedExprs.csv b/tools/generated_files/supportedExprs.csv
index 5f57725522f..1e590202a05 100644
--- a/tools/generated_files/supportedExprs.csv
+++ b/tools/generated_files/supportedExprs.csv
@@ -687,6 +687,12 @@ Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
 Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
 Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
 Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS
 Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA
 Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA