From 0718545419d89c060bdbc4fe2bcd4d1cbbf9ac6b Mon Sep 17 00:00:00 2001
From: fejiang <fejiang@nvidia.com>
Date: Fri, 27 Sep 2024 16:09:34 +0800
Subject: [PATCH 1/5] override checkScanScehmata

Signed-off-by: fejiang <fejiang@nvidia.com>
---
 .../RapidsParquetSchemaPruningSuite.scala     | 29 ++++++++++++++++++-
 .../sql/rapids/utils/RapidsTestSettings.scala |  8 ++---
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
index 689448fb7f0..32fa94a300a 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
@@ -19,9 +19,36 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaPruningSuite
+import org.apache.spark.sql.rapids.GpuFileSourceScanExec
 import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait
 
 class RapidsParquetSchemaPruningSuite
   extends ParquetSchemaPruningSuite
-  with RapidsSQLTestsBaseTrait {}
+  with RapidsSQLTestsBaseTrait {
+
+  override protected def checkScanSchemata(df: DataFrame,
+                                           expectedSchemaCatalogStrings: String*): Unit = {
+    val fileSourceScanSchemata =
+      collect(df.queryExecution.executedPlan) {
+        case scan: FileSourceScanExec => scan.requiredSchema
+        case gpuScan: GpuFileSourceScanExec => gpuScan.requiredSchema
+      }
+    // Print the full execution plan
+    println("Full Execution Plan:")
+    println(df.queryExecution.executedPlan.treeString)
+    assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
+      s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
+        s"but expected $expectedSchemaCatalogStrings")
+    fileSourceScanSchemata.zip(expectedSchemaCatalogStrings).foreach {
+      case (scanSchema, expectedScanSchemaCatalogString) =>
+        val expectedScanSchema = CatalystSqlParser.parseDataType(expectedScanSchemaCatalogString)
+        implicit val equality = schemaEquality
+        assert(scanSchema === expectedScanSchema)
+    }
+  }
+
+}
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
index 8b76e350fef..98bf8f35fd7 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -102,10 +102,10 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("SPARK-31159: rebasing dates in write", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404"))
     .exclude("SPARK-35427: datetime rebasing in the EXCEPTION mode", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404"))
   enableSuite[RapidsParquetSchemaPruningSuite]
-    .excludeByPrefix("Spark vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    .excludeByPrefix("Non-vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    .excludeByPrefix("Case-insensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    .excludeByPrefix("Case-sensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    //.excludeByPrefix("Spark vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    //.excludeByPrefix("Non-vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    //.excludeByPrefix("Case-insensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    //.excludeByPrefix("Case-sensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
   enableSuite[RapidsParquetSchemaSuite]
     .exclude("schema mismatch failure error message for parquet reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11434"))
     .exclude("schema mismatch failure error message for parquet vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11446"))

From f5d4d632155c085ef3f2ee40988efe2df3ed6b19 Mon Sep 17 00:00:00 2001
From: fejiang <fejiang@nvidia.com>
Date: Sat, 19 Oct 2024 12:05:20 +0800
Subject: [PATCH 2/5] case added for pruning suite

Signed-off-by: fejiang <fejiang@nvidia.com>
---
 .../RapidsParquetSchemaPruningSuite.scala     | 131 +++++++++++++++++-
 .../sql/rapids/utils/RapidsTestSettings.scala |   8 +-
 2 files changed, 134 insertions(+), 5 deletions(-)

diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
index 32fa94a300a..c2e9f84c5d5 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
@@ -19,12 +19,20 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
-import org.apache.spark.sql.DataFrame
+import java.io.File
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe._
+import scala.reflect.runtime.universe.TypeTag
+import org.scalactic.Equality
+import org.apache.spark.sql.{DataFrame, Row, SaveMode}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaPruningSuite
+import org.apache.spark.sql.functions.{col, explode}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.GpuFileSourceScanExec
 import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait
+import org.apache.spark.sql.types.StructType
 
 class RapidsParquetSchemaPruningSuite
   extends ParquetSchemaPruningSuite
@@ -49,6 +57,127 @@ class RapidsParquetSchemaPruningSuite
         implicit val equality = schemaEquality
         assert(scanSchema === expectedScanSchema)
     }
+    println("hello I am here")
   }
 
+
+  testSchemaPruning("select a single complex field 2") {
+    //val query = sql("select name.middle from contacts")
+    val query = sql("select name.middle, address from contacts where p=2")
+    query.show()
+    //checkScan2(query, "struct<name:struct<middle:string>>")
+    //checkAnswer(query.orderBy("id"), Row("X.") :: Row("Y.") :: Row(null) :: Row(null) :: Nil)
+  }
+
+  protected val schemaEquality2 = new Equality[StructType] {
+    override def areEqual(a: StructType, b: Any): Boolean =
+      b match {
+        case otherType: StructType => a.sameType(otherType)
+        case _ => false
+      }
+  }
+
+  protected def checkScan2(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
+    //checkScanSchemata(df, expectedSchemaCatalogStrings: _*)
+    // We check here that we can execute the query without throwing an exception. The results
+    // themselves are irrelevant, and should be checked elsewhere as needed
+    //df.collect()
+  }
+
+  protected def makeDataSourceFile2[T <: Product : ClassTag : TypeTag]
+  (data: Seq[T], path: File): Unit = {
+    spark.createDataFrame(data).write.mode(SaveMode.Overwrite).format(dataSourceName)
+      .save(path.getCanonicalPath)
+  }
+
+
+  test("makeDataSourceFile"){
+
+    val path = "/home/fejiang/Desktop"
+
+    val briefContacts =
+      BriefContact(2, Name("Janet", "Jones"), "567 Maple Drive") ::
+        BriefContact(3, Name("Jim", "Jones"), "6242 Ash Street") :: Nil
+
+    makeDataSourceFile2(contacts, new File(path + "/contacts/p=1"))
+    makeDataSourceFile2(briefContacts, new File(path + "/contacts/p=2"))
+    makeDataSourceFile2(departments, new File(path + "/departments"))
+
+    val schema = "`id` INT,`name` STRUCT<`first`: STRING, `middle`: STRING, `last`: STRING>, " +
+      "`address` STRING,`pets` INT,`friends` ARRAY<STRUCT<`first`: STRING, `middle`: STRING, " +
+      "`last`: STRING>>,`relatives` MAP<STRING, STRUCT<`first`: STRING, `middle`: STRING, " +
+      "`last`: STRING>>,`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, " +
+      "`address`: STRING>>,`relations` MAP<STRUCT<`first`: STRING, `middle`: STRING, " +
+      "`last`: STRING>,STRING>,`p` INT"
+
+    spark.read.format(dataSourceName).schema(schema).load(path + "/contacts")
+      .createOrReplaceTempView("contacts")
+
+    val departmentSchema = "`depId` INT,`depName` STRING,`contactId` INT, " +
+      "`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, `address`: STRING>>"
+    spark.read.format(dataSourceName).schema(departmentSchema).load(path + "/departments")
+      .createOrReplaceTempView("departments")
+
+//    val query = sql("select name.middle from contacts")
+//
+//    query.show()
+
+    val configs = Seq((false, true))
+
+    configs.foreach { case (nestedPruning, nestedPruningOnExpr) =>
+      withSQLConf(
+        SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> nestedPruning.toString,
+        SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> nestedPruningOnExpr.toString) {
+        val query1 = spark.table("contacts")
+          .select(explode(col("friends.first")))
+
+        query1.collect()
+//        checkAnswer(query1, Row("Susan") :: Nil)
+
+      }
+    }
+
+  }
+
+  def printType[T: TypeTag](value: T): Unit = {
+    val tpe = typeOf[T]
+    println(s"The type of the value is: $tpe")
+  }
+
+  test("test TypeTag"){
+    printType(42)
+    printType("Hello")
+    printType(List(1, 2, 3))
+
+    val contactsTypeTag: TypeTag[Seq[Contact]] = typeTag[Seq[Contact]]
+
+    // Print the type
+    println(s"TypeTag for contacts: ${contactsTypeTag.tpe}")
+
+  }
+
+
+  testSchemaPruning("select explode of nested field of array of struct2") {
+    // Config combinations
+    val configs = Seq( (false, true))
+
+    configs.foreach { case (nestedPruning, nestedPruningOnExpr) =>
+      withSQLConf(
+        SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> nestedPruning.toString,
+        SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> nestedPruningOnExpr.toString) {
+        val query1 = spark.table("contacts")
+          .select(explode(col("friends.first")))
+        query1.collect()
+
+      }
+    }
+  }
+
+
+  testSchemaPruning("empty schema intersection2") {
+    val query = sql("select name.middle from contacts where p=2")
+    checkScan(query, "struct<name:struct<middle:string>>")
+    checkAnswer(query.orderBy("id"),
+      Row(null) :: Row(null) :: Nil)
+  }
 }
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
index 98bf8f35fd7..8b76e350fef 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -102,10 +102,10 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("SPARK-31159: rebasing dates in write", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404"))
     .exclude("SPARK-35427: datetime rebasing in the EXCEPTION mode", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404"))
   enableSuite[RapidsParquetSchemaPruningSuite]
-    //.excludeByPrefix("Spark vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    //.excludeByPrefix("Non-vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    //.excludeByPrefix("Case-insensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    //.excludeByPrefix("Case-sensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    .excludeByPrefix("Spark vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    .excludeByPrefix("Non-vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    .excludeByPrefix("Case-insensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    .excludeByPrefix("Case-sensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
   enableSuite[RapidsParquetSchemaSuite]
     .exclude("schema mismatch failure error message for parquet reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11434"))
     .exclude("schema mismatch failure error message for parquet vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11446"))

From 42b2be50d2accf0e202360dc3e18d6ae49c93fde Mon Sep 17 00:00:00 2001
From: fejiang <fejiang@nvidia.com>
Date: Thu, 24 Oct 2024 10:58:33 +0800
Subject: [PATCH 3/5] exclude by suffix added

Signed-off-by: fejiang <fejiang@nvidia.com>
---
 .../RapidsParquetSchemaPruningSuite.scala     | 134 +-----------------
 .../rapids/utils/BackendTestSettings.scala    |  23 +++
 .../sql/rapids/utils/RapidsTestSettings.scala |  11 +-
 3 files changed, 31 insertions(+), 137 deletions(-)

diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
index c2e9f84c5d5..76b5563de4f 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
@@ -19,20 +19,12 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
-import java.io.File
-import scala.reflect.ClassTag
-import scala.reflect.runtime.universe._
-import scala.reflect.runtime.universe.TypeTag
-import org.scalactic.Equality
-import org.apache.spark.sql.{DataFrame, Row, SaveMode}
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaPruningSuite
-import org.apache.spark.sql.functions.{col, explode}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.GpuFileSourceScanExec
 import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait
-import org.apache.spark.sql.types.StructType
 
 class RapidsParquetSchemaPruningSuite
   extends ParquetSchemaPruningSuite
@@ -45,9 +37,6 @@ class RapidsParquetSchemaPruningSuite
         case scan: FileSourceScanExec => scan.requiredSchema
         case gpuScan: GpuFileSourceScanExec => gpuScan.requiredSchema
       }
-    // Print the full execution plan
-    println("Full Execution Plan:")
-    println(df.queryExecution.executedPlan.treeString)
     assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
       s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
         s"but expected $expectedSchemaCatalogStrings")
@@ -57,127 +46,6 @@ class RapidsParquetSchemaPruningSuite
         implicit val equality = schemaEquality
         assert(scanSchema === expectedScanSchema)
     }
-    println("hello I am here")
-  }
-
-
-  testSchemaPruning("select a single complex field 2") {
-    //val query = sql("select name.middle from contacts")
-    val query = sql("select name.middle, address from contacts where p=2")
-    query.show()
-    //checkScan2(query, "struct<name:struct<middle:string>>")
-    //checkAnswer(query.orderBy("id"), Row("X.") :: Row("Y.") :: Row(null) :: Row(null) :: Nil)
-  }
-
-  protected val schemaEquality2 = new Equality[StructType] {
-    override def areEqual(a: StructType, b: Any): Boolean =
-      b match {
-        case otherType: StructType => a.sameType(otherType)
-        case _ => false
-      }
-  }
-
-  protected def checkScan2(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
-    //checkScanSchemata(df, expectedSchemaCatalogStrings: _*)
-    // We check here that we can execute the query without throwing an exception. The results
-    // themselves are irrelevant, and should be checked elsewhere as needed
-    //df.collect()
-  }
-
-  protected def makeDataSourceFile2[T <: Product : ClassTag : TypeTag]
-  (data: Seq[T], path: File): Unit = {
-    spark.createDataFrame(data).write.mode(SaveMode.Overwrite).format(dataSourceName)
-      .save(path.getCanonicalPath)
-  }
-
-
-  test("makeDataSourceFile"){
-
-    val path = "/home/fejiang/Desktop"
-
-    val briefContacts =
-      BriefContact(2, Name("Janet", "Jones"), "567 Maple Drive") ::
-        BriefContact(3, Name("Jim", "Jones"), "6242 Ash Street") :: Nil
-
-    makeDataSourceFile2(contacts, new File(path + "/contacts/p=1"))
-    makeDataSourceFile2(briefContacts, new File(path + "/contacts/p=2"))
-    makeDataSourceFile2(departments, new File(path + "/departments"))
-
-    val schema = "`id` INT,`name` STRUCT<`first`: STRING, `middle`: STRING, `last`: STRING>, " +
-      "`address` STRING,`pets` INT,`friends` ARRAY<STRUCT<`first`: STRING, `middle`: STRING, " +
-      "`last`: STRING>>,`relatives` MAP<STRING, STRUCT<`first`: STRING, `middle`: STRING, " +
-      "`last`: STRING>>,`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, " +
-      "`address`: STRING>>,`relations` MAP<STRUCT<`first`: STRING, `middle`: STRING, " +
-      "`last`: STRING>,STRING>,`p` INT"
-
-    spark.read.format(dataSourceName).schema(schema).load(path + "/contacts")
-      .createOrReplaceTempView("contacts")
-
-    val departmentSchema = "`depId` INT,`depName` STRING,`contactId` INT, " +
-      "`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, `address`: STRING>>"
-    spark.read.format(dataSourceName).schema(departmentSchema).load(path + "/departments")
-      .createOrReplaceTempView("departments")
-
-//    val query = sql("select name.middle from contacts")
-//
-//    query.show()
-
-    val configs = Seq((false, true))
-
-    configs.foreach { case (nestedPruning, nestedPruningOnExpr) =>
-      withSQLConf(
-        SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> nestedPruning.toString,
-        SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> nestedPruningOnExpr.toString) {
-        val query1 = spark.table("contacts")
-          .select(explode(col("friends.first")))
-
-        query1.collect()
-//        checkAnswer(query1, Row("Susan") :: Nil)
-
-      }
-    }
-
-  }
-
-  def printType[T: TypeTag](value: T): Unit = {
-    val tpe = typeOf[T]
-    println(s"The type of the value is: $tpe")
-  }
-
-  test("test TypeTag"){
-    printType(42)
-    printType("Hello")
-    printType(List(1, 2, 3))
-
-    val contactsTypeTag: TypeTag[Seq[Contact]] = typeTag[Seq[Contact]]
-
-    // Print the type
-    println(s"TypeTag for contacts: ${contactsTypeTag.tpe}")
-
-  }
-
-
-  testSchemaPruning("select explode of nested field of array of struct2") {
-    // Config combinations
-    val configs = Seq( (false, true))
-
-    configs.foreach { case (nestedPruning, nestedPruningOnExpr) =>
-      withSQLConf(
-        SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> nestedPruning.toString,
-        SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> nestedPruningOnExpr.toString) {
-        val query1 = spark.table("contacts")
-          .select(explode(col("friends.first")))
-        query1.collect()
-
-      }
-    }
-  }
-
 
-  testSchemaPruning("empty schema intersection2") {
-    val query = sql("select name.middle from contacts where p=2")
-    checkScan(query, "struct<name:struct<middle:string>>")
-    checkAnswer(query.orderBy("id"),
-      Row(null) :: Row(null) :: Nil)
   }
 }
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
index a57b7802c9d..c9bcd7da99f 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
@@ -141,6 +141,12 @@ abstract class BackendTestSettings {
       this
     }
 
+    def excludeBySuffix(suffixes: String, reason: ExcludeReason): SuiteSettings = {
+      exclusion.add(ExcludeBySuffix(suffixes))
+      excludeReasons.add(reason)
+      this
+    }
+
     def includeRapidsTestsByPrefix(prefixes: String*): SuiteSettings = {
       inclusion.add(IncludeRapidsTestByPrefix(prefixes: _*))
       this
@@ -152,6 +158,13 @@ abstract class BackendTestSettings {
       this
     }
 
+    def excludeRapidsTestsBySuffix(suffixes: String, reason: ExcludeReason): SuiteSettings = {
+      exclusion.add(ExcludeRadpisTestByPrefix(suffixes))
+      excludeReasons.add(reason)
+      this
+    }
+
+
     def includeAllRapidsTests(): SuiteSettings = {
       inclusion.add(IncludeByPrefix(RAPIDS_TEST))
       this
@@ -159,6 +172,7 @@ abstract class BackendTestSettings {
 
     def excludeAllRapidsTests(reason: ExcludeReason): SuiteSettings = {
       exclusion.add(ExcludeByPrefix(RAPIDS_TEST))
+      exclusion.add(ExcludeBySuffix(RAPIDS_TEST))
       excludeReasons.add(reason)
       this
     }
@@ -210,6 +224,15 @@ abstract class BackendTestSettings {
     }
   }
 
+  private case class ExcludeBySuffix(suffixes: String*) extends ExcludeBase {
+    override def isExcluded(testName: String): Boolean = {
+      if (suffixes.exists(suffix => testName.endsWith(suffix))) {
+        return true
+      }
+      false
+    }
+  }
+
   private case class IncludeRapidsTestByPrefix(prefixes: String*) extends IncludeBase {
     override def isIncluded(testName: String): Boolean = {
       if (prefixes.exists(prefix => testName.startsWith(RAPIDS_TEST + prefix))) {
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
index 8b76e350fef..7b4f7b47a0e 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -102,10 +102,13 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("SPARK-31159: rebasing dates in write", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404"))
     .exclude("SPARK-35427: datetime rebasing in the EXCEPTION mode", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404"))
   enableSuite[RapidsParquetSchemaPruningSuite]
-    .excludeByPrefix("Spark vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    .excludeByPrefix("Non-vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    .excludeByPrefix("Case-insensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    .excludeByPrefix("Case-sensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    .excludeBySuffix("select a single complex field", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11619"))
+    .excludeBySuffix("select a single complex field and the partition column", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11620"))
+    .excludeBySuffix("select missing subfield", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11621"))
+    .excludeBySuffix("select explode of nested field of array of struct", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    .excludeBySuffix("empty schema intersection", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11627"))
+    .excludeBySuffix("select one deep nested complex field after join", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11628"))
+    .excludeBySuffix("select one deep nested complex field after outer join", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11629"))
   enableSuite[RapidsParquetSchemaSuite]
     .exclude("schema mismatch failure error message for parquet reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11434"))
     .exclude("schema mismatch failure error message for parquet vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11446"))

From b90a4dfcb1519f1f06e2cbe1e8a4df0866f2d24a Mon Sep 17 00:00:00 2001
From: fejiang <fejiang@nvidia.com>
Date: Thu, 24 Oct 2024 15:46:00 +0800
Subject: [PATCH 4/5] issue number assigned

Signed-off-by: fejiang <fejiang@nvidia.com>
---
 .../org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
index 7b4f7b47a0e..d6d15e81e80 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -105,7 +105,7 @@ class RapidsTestSettings extends BackendTestSettings {
     .excludeBySuffix("select a single complex field", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11619"))
     .excludeBySuffix("select a single complex field and the partition column", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11620"))
     .excludeBySuffix("select missing subfield", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11621"))
-    .excludeBySuffix("select explode of nested field of array of struct", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    .excludeBySuffix("select explode of nested field of array of struct", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11653"))
     .excludeBySuffix("empty schema intersection", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11627"))
     .excludeBySuffix("select one deep nested complex field after join", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11628"))
     .excludeBySuffix("select one deep nested complex field after outer join", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11629"))

From 80275b3a9adfbcde1385c1c4cbf05f9d645d81b4 Mon Sep 17 00:00:00 2001
From: fejiang <fejiang@nvidia.com>
Date: Thu, 24 Oct 2024 15:57:58 +0800
Subject: [PATCH 5/5] nit

Signed-off-by: fejiang <fejiang@nvidia.com>
---
 .../org/apache/spark/sql/rapids/utils/BackendTestSettings.scala  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
index c9bcd7da99f..6e35d568aca 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
@@ -164,7 +164,6 @@ abstract class BackendTestSettings {
       this
     }
 
-
     def includeAllRapidsTests(): SuiteSettings = {
       inclusion.add(IncludeByPrefix(RAPIDS_TEST))
       this