From 0718545419d89c060bdbc4fe2bcd4d1cbbf9ac6b Mon Sep 17 00:00:00 2001 From: fejiang Date: Fri, 27 Sep 2024 16:09:34 +0800 Subject: [PATCH 1/5] override checkScanScehmata Signed-off-by: fejiang --- .../RapidsParquetSchemaPruningSuite.scala | 29 ++++++++++++++++++- .../sql/rapids/utils/RapidsTestSettings.scala | 8 ++--- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala index 689448fb7f0..32fa94a300a 100644 --- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala @@ -19,9 +19,36 @@ spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.suites +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser +import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaPruningSuite +import org.apache.spark.sql.rapids.GpuFileSourceScanExec import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait class RapidsParquetSchemaPruningSuite extends ParquetSchemaPruningSuite - with RapidsSQLTestsBaseTrait {} + with RapidsSQLTestsBaseTrait { + + override protected def checkScanSchemata(df: DataFrame, + expectedSchemaCatalogStrings: String*): Unit = { + val fileSourceScanSchemata = + collect(df.queryExecution.executedPlan) { + case scan: FileSourceScanExec => scan.requiredSchema + case gpuScan: GpuFileSourceScanExec => gpuScan.requiredSchema + } + // Print the full execution plan + println("Full Execution Plan:") + println(df.queryExecution.executedPlan.treeString) + assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size, + s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " + + s"but expected $expectedSchemaCatalogStrings") + fileSourceScanSchemata.zip(expectedSchemaCatalogStrings).foreach { + case (scanSchema, expectedScanSchemaCatalogString) => + val expectedScanSchema = CatalystSqlParser.parseDataType(expectedScanSchemaCatalogString) + implicit val equality = schemaEquality + assert(scanSchema === expectedScanSchema) + } + } + +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala index 8b76e350fef..98bf8f35fd7 100644 --- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala @@ -102,10 +102,10 @@ class RapidsTestSettings extends BackendTestSettings { .exclude("SPARK-31159: rebasing dates in write", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404")) .exclude("SPARK-35427: datetime rebasing in the EXCEPTION mode", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404")) enableSuite[RapidsParquetSchemaPruningSuite] - .excludeByPrefix("Spark vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) - .excludeByPrefix("Non-vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) - .excludeByPrefix("Case-insensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) - .excludeByPrefix("Case-sensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) + //.excludeByPrefix("Spark vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) + //.excludeByPrefix("Non-vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) + //.excludeByPrefix("Case-insensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) + //.excludeByPrefix("Case-sensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) enableSuite[RapidsParquetSchemaSuite] .exclude("schema mismatch failure error message for parquet reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11434")) .exclude("schema mismatch failure error message for parquet vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11446")) From f5d4d632155c085ef3f2ee40988efe2df3ed6b19 Mon Sep 17 00:00:00 2001 From: fejiang Date: Sat, 19 Oct 2024 12:05:20 +0800 Subject: [PATCH 2/5] case added for pruning suite Signed-off-by: fejiang --- .../RapidsParquetSchemaPruningSuite.scala | 131 +++++++++++++++++- .../sql/rapids/utils/RapidsTestSettings.scala | 8 +- 2 files changed, 134 insertions(+), 5 deletions(-) diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala index 32fa94a300a..c2e9f84c5d5 100644 --- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala @@ -19,12 +19,20 @@ spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.suites -import org.apache.spark.sql.DataFrame +import java.io.File +import scala.reflect.ClassTag +import scala.reflect.runtime.universe._ +import scala.reflect.runtime.universe.TypeTag +import org.scalactic.Equality +import org.apache.spark.sql.{DataFrame, Row, SaveMode} import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaPruningSuite +import org.apache.spark.sql.functions.{col, explode} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.rapids.GpuFileSourceScanExec import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait +import org.apache.spark.sql.types.StructType class RapidsParquetSchemaPruningSuite extends ParquetSchemaPruningSuite @@ -49,6 +57,127 @@ class RapidsParquetSchemaPruningSuite implicit val equality = schemaEquality assert(scanSchema === expectedScanSchema) } + println("hello I am here") } + + testSchemaPruning("select a single complex field 2") { + //val query = sql("select name.middle from contacts") + val query = sql("select name.middle, address from contacts where p=2") + query.show() + //checkScan2(query, "struct>") + //checkAnswer(query.orderBy("id"), Row("X.") :: Row("Y.") :: Row(null) :: Row(null) :: Nil) + } + + protected val schemaEquality2 = new Equality[StructType] { + override def areEqual(a: StructType, b: Any): Boolean = + b match { + case otherType: StructType => a.sameType(otherType) + case _ => false + } + } + + protected def checkScan2(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = { + //checkScanSchemata(df, expectedSchemaCatalogStrings: _*) + // We check here that we can execute the query without throwing an exception. The results + // themselves are irrelevant, and should be checked elsewhere as needed + //df.collect() + } + + protected def makeDataSourceFile2[T <: Product : ClassTag : TypeTag] + (data: Seq[T], path: File): Unit = { + spark.createDataFrame(data).write.mode(SaveMode.Overwrite).format(dataSourceName) + .save(path.getCanonicalPath) + } + + + test("makeDataSourceFile"){ + + val path = "/home/fejiang/Desktop" + + val briefContacts = + BriefContact(2, Name("Janet", "Jones"), "567 Maple Drive") :: + BriefContact(3, Name("Jim", "Jones"), "6242 Ash Street") :: Nil + + makeDataSourceFile2(contacts, new File(path + "/contacts/p=1")) + makeDataSourceFile2(briefContacts, new File(path + "/contacts/p=2")) + makeDataSourceFile2(departments, new File(path + "/departments")) + + val schema = "`id` INT,`name` STRUCT<`first`: STRING, `middle`: STRING, `last`: STRING>, " + + "`address` STRING,`pets` INT,`friends` ARRAY>,`relatives` MAP>,`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, " + + "`address`: STRING>>,`relations` MAP,STRING>,`p` INT" + + spark.read.format(dataSourceName).schema(schema).load(path + "/contacts") + .createOrReplaceTempView("contacts") + + val departmentSchema = "`depId` INT,`depName` STRING,`contactId` INT, " + + "`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, `address`: STRING>>" + spark.read.format(dataSourceName).schema(departmentSchema).load(path + "/departments") + .createOrReplaceTempView("departments") + +// val query = sql("select name.middle from contacts") +// +// query.show() + + val configs = Seq((false, true)) + + configs.foreach { case (nestedPruning, nestedPruningOnExpr) => + withSQLConf( + SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> nestedPruning.toString, + SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> nestedPruningOnExpr.toString) { + val query1 = spark.table("contacts") + .select(explode(col("friends.first"))) + + query1.collect() +// checkAnswer(query1, Row("Susan") :: Nil) + + } + } + + } + + def printType[T: TypeTag](value: T): Unit = { + val tpe = typeOf[T] + println(s"The type of the value is: $tpe") + } + + test("test TypeTag"){ + printType(42) + printType("Hello") + printType(List(1, 2, 3)) + + val contactsTypeTag: TypeTag[Seq[Contact]] = typeTag[Seq[Contact]] + + // Print the type + println(s"TypeTag for contacts: ${contactsTypeTag.tpe}") + + } + + + testSchemaPruning("select explode of nested field of array of struct2") { + // Config combinations + val configs = Seq( (false, true)) + + configs.foreach { case (nestedPruning, nestedPruningOnExpr) => + withSQLConf( + SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> nestedPruning.toString, + SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> nestedPruningOnExpr.toString) { + val query1 = spark.table("contacts") + .select(explode(col("friends.first"))) + query1.collect() + + } + } + } + + + testSchemaPruning("empty schema intersection2") { + val query = sql("select name.middle from contacts where p=2") + checkScan(query, "struct>") + checkAnswer(query.orderBy("id"), + Row(null) :: Row(null) :: Nil) + } } diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala index 98bf8f35fd7..8b76e350fef 100644 --- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala @@ -102,10 +102,10 @@ class RapidsTestSettings extends BackendTestSettings { .exclude("SPARK-31159: rebasing dates in write", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404")) .exclude("SPARK-35427: datetime rebasing in the EXCEPTION mode", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404")) enableSuite[RapidsParquetSchemaPruningSuite] - //.excludeByPrefix("Spark vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) - //.excludeByPrefix("Non-vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) - //.excludeByPrefix("Case-insensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) - //.excludeByPrefix("Case-sensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) + .excludeByPrefix("Spark vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) + .excludeByPrefix("Non-vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) + .excludeByPrefix("Case-insensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) + .excludeByPrefix("Case-sensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) enableSuite[RapidsParquetSchemaSuite] .exclude("schema mismatch failure error message for parquet reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11434")) .exclude("schema mismatch failure error message for parquet vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11446")) From 42b2be50d2accf0e202360dc3e18d6ae49c93fde Mon Sep 17 00:00:00 2001 From: fejiang Date: Thu, 24 Oct 2024 10:58:33 +0800 Subject: [PATCH 3/5] exclude by suffix added Signed-off-by: fejiang --- .../RapidsParquetSchemaPruningSuite.scala | 134 +----------------- .../rapids/utils/BackendTestSettings.scala | 23 +++ .../sql/rapids/utils/RapidsTestSettings.scala | 11 +- 3 files changed, 31 insertions(+), 137 deletions(-) diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala index c2e9f84c5d5..76b5563de4f 100644 --- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala @@ -19,20 +19,12 @@ spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.suites -import java.io.File -import scala.reflect.ClassTag -import scala.reflect.runtime.universe._ -import scala.reflect.runtime.universe.TypeTag -import org.scalactic.Equality -import org.apache.spark.sql.{DataFrame, Row, SaveMode} +import org.apache.spark.sql.DataFrame import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaPruningSuite -import org.apache.spark.sql.functions.{col, explode} -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.rapids.GpuFileSourceScanExec import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait -import org.apache.spark.sql.types.StructType class RapidsParquetSchemaPruningSuite extends ParquetSchemaPruningSuite @@ -45,9 +37,6 @@ class RapidsParquetSchemaPruningSuite case scan: FileSourceScanExec => scan.requiredSchema case gpuScan: GpuFileSourceScanExec => gpuScan.requiredSchema } - // Print the full execution plan - println("Full Execution Plan:") - println(df.queryExecution.executedPlan.treeString) assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size, s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " + s"but expected $expectedSchemaCatalogStrings") @@ -57,127 +46,6 @@ class RapidsParquetSchemaPruningSuite implicit val equality = schemaEquality assert(scanSchema === expectedScanSchema) } - println("hello I am here") - } - - - testSchemaPruning("select a single complex field 2") { - //val query = sql("select name.middle from contacts") - val query = sql("select name.middle, address from contacts where p=2") - query.show() - //checkScan2(query, "struct>") - //checkAnswer(query.orderBy("id"), Row("X.") :: Row("Y.") :: Row(null) :: Row(null) :: Nil) - } - - protected val schemaEquality2 = new Equality[StructType] { - override def areEqual(a: StructType, b: Any): Boolean = - b match { - case otherType: StructType => a.sameType(otherType) - case _ => false - } - } - - protected def checkScan2(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = { - //checkScanSchemata(df, expectedSchemaCatalogStrings: _*) - // We check here that we can execute the query without throwing an exception. The results - // themselves are irrelevant, and should be checked elsewhere as needed - //df.collect() - } - - protected def makeDataSourceFile2[T <: Product : ClassTag : TypeTag] - (data: Seq[T], path: File): Unit = { - spark.createDataFrame(data).write.mode(SaveMode.Overwrite).format(dataSourceName) - .save(path.getCanonicalPath) - } - - - test("makeDataSourceFile"){ - - val path = "/home/fejiang/Desktop" - - val briefContacts = - BriefContact(2, Name("Janet", "Jones"), "567 Maple Drive") :: - BriefContact(3, Name("Jim", "Jones"), "6242 Ash Street") :: Nil - - makeDataSourceFile2(contacts, new File(path + "/contacts/p=1")) - makeDataSourceFile2(briefContacts, new File(path + "/contacts/p=2")) - makeDataSourceFile2(departments, new File(path + "/departments")) - - val schema = "`id` INT,`name` STRUCT<`first`: STRING, `middle`: STRING, `last`: STRING>, " + - "`address` STRING,`pets` INT,`friends` ARRAY>,`relatives` MAP>,`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, " + - "`address`: STRING>>,`relations` MAP,STRING>,`p` INT" - - spark.read.format(dataSourceName).schema(schema).load(path + "/contacts") - .createOrReplaceTempView("contacts") - - val departmentSchema = "`depId` INT,`depName` STRING,`contactId` INT, " + - "`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, `address`: STRING>>" - spark.read.format(dataSourceName).schema(departmentSchema).load(path + "/departments") - .createOrReplaceTempView("departments") - -// val query = sql("select name.middle from contacts") -// -// query.show() - - val configs = Seq((false, true)) - - configs.foreach { case (nestedPruning, nestedPruningOnExpr) => - withSQLConf( - SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> nestedPruning.toString, - SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> nestedPruningOnExpr.toString) { - val query1 = spark.table("contacts") - .select(explode(col("friends.first"))) - - query1.collect() -// checkAnswer(query1, Row("Susan") :: Nil) - - } - } - - } - - def printType[T: TypeTag](value: T): Unit = { - val tpe = typeOf[T] - println(s"The type of the value is: $tpe") - } - - test("test TypeTag"){ - printType(42) - printType("Hello") - printType(List(1, 2, 3)) - - val contactsTypeTag: TypeTag[Seq[Contact]] = typeTag[Seq[Contact]] - - // Print the type - println(s"TypeTag for contacts: ${contactsTypeTag.tpe}") - - } - - - testSchemaPruning("select explode of nested field of array of struct2") { - // Config combinations - val configs = Seq( (false, true)) - - configs.foreach { case (nestedPruning, nestedPruningOnExpr) => - withSQLConf( - SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> nestedPruning.toString, - SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> nestedPruningOnExpr.toString) { - val query1 = spark.table("contacts") - .select(explode(col("friends.first"))) - query1.collect() - - } - } - } - - testSchemaPruning("empty schema intersection2") { - val query = sql("select name.middle from contacts where p=2") - checkScan(query, "struct>") - checkAnswer(query.orderBy("id"), - Row(null) :: Row(null) :: Nil) } } diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala index a57b7802c9d..c9bcd7da99f 100644 --- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala @@ -141,6 +141,12 @@ abstract class BackendTestSettings { this } + def excludeBySuffix(suffixes: String, reason: ExcludeReason): SuiteSettings = { + exclusion.add(ExcludeBySuffix(suffixes)) + excludeReasons.add(reason) + this + } + def includeRapidsTestsByPrefix(prefixes: String*): SuiteSettings = { inclusion.add(IncludeRapidsTestByPrefix(prefixes: _*)) this @@ -152,6 +158,13 @@ abstract class BackendTestSettings { this } + def excludeRapidsTestsBySuffix(suffixes: String, reason: ExcludeReason): SuiteSettings = { + exclusion.add(ExcludeRadpisTestByPrefix(suffixes)) + excludeReasons.add(reason) + this + } + + def includeAllRapidsTests(): SuiteSettings = { inclusion.add(IncludeByPrefix(RAPIDS_TEST)) this @@ -159,6 +172,7 @@ abstract class BackendTestSettings { def excludeAllRapidsTests(reason: ExcludeReason): SuiteSettings = { exclusion.add(ExcludeByPrefix(RAPIDS_TEST)) + exclusion.add(ExcludeBySuffix(RAPIDS_TEST)) excludeReasons.add(reason) this } @@ -210,6 +224,15 @@ abstract class BackendTestSettings { } } + private case class ExcludeBySuffix(suffixes: String*) extends ExcludeBase { + override def isExcluded(testName: String): Boolean = { + if (suffixes.exists(suffix => testName.endsWith(suffix))) { + return true + } + false + } + } + private case class IncludeRapidsTestByPrefix(prefixes: String*) extends IncludeBase { override def isIncluded(testName: String): Boolean = { if (prefixes.exists(prefix => testName.startsWith(RAPIDS_TEST + prefix))) { diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala index 8b76e350fef..7b4f7b47a0e 100644 --- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala @@ -102,10 +102,13 @@ class RapidsTestSettings extends BackendTestSettings { .exclude("SPARK-31159: rebasing dates in write", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404")) .exclude("SPARK-35427: datetime rebasing in the EXCEPTION mode", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404")) enableSuite[RapidsParquetSchemaPruningSuite] - .excludeByPrefix("Spark vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) - .excludeByPrefix("Non-vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) - .excludeByPrefix("Case-insensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) - .excludeByPrefix("Case-sensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) + .excludeBySuffix("select a single complex field", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11619")) + .excludeBySuffix("select a single complex field and the partition column", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11620")) + .excludeBySuffix("select missing subfield", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11621")) + .excludeBySuffix("select explode of nested field of array of struct", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) + .excludeBySuffix("empty schema intersection", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11627")) + .excludeBySuffix("select one deep nested complex field after join", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11628")) + .excludeBySuffix("select one deep nested complex field after outer join", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11629")) enableSuite[RapidsParquetSchemaSuite] .exclude("schema mismatch failure error message for parquet reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11434")) .exclude("schema mismatch failure error message for parquet vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11446")) From b90a4dfcb1519f1f06e2cbe1e8a4df0866f2d24a Mon Sep 17 00:00:00 2001 From: fejiang Date: Thu, 24 Oct 2024 15:46:00 +0800 Subject: [PATCH 4/5] issue number assigned Signed-off-by: fejiang --- .../org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala index 7b4f7b47a0e..d6d15e81e80 100644 --- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala @@ -105,7 +105,7 @@ class RapidsTestSettings extends BackendTestSettings { .excludeBySuffix("select a single complex field", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11619")) .excludeBySuffix("select a single complex field and the partition column", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11620")) .excludeBySuffix("select missing subfield", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11621")) - .excludeBySuffix("select explode of nested field of array of struct", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405")) + .excludeBySuffix("select explode of nested field of array of struct", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11653")) .excludeBySuffix("empty schema intersection", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11627")) .excludeBySuffix("select one deep nested complex field after join", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11628")) .excludeBySuffix("select one deep nested complex field after outer join", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11629")) From 80275b3a9adfbcde1385c1c4cbf05f9d645d81b4 Mon Sep 17 00:00:00 2001 From: fejiang Date: Thu, 24 Oct 2024 15:57:58 +0800 Subject: [PATCH 5/5] nit Signed-off-by: fejiang --- .../org/apache/spark/sql/rapids/utils/BackendTestSettings.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala index c9bcd7da99f..6e35d568aca 100644 --- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala @@ -164,7 +164,6 @@ abstract class BackendTestSettings { this } - def includeAllRapidsTests(): SuiteSettings = { inclusion.add(IncludeByPrefix(RAPIDS_TEST)) this