[KYUUBI apache#5414][KSHC] Reader should not pollut the global hiveCo…

…nf instance ### _Why are the changes needed?_ This pr aims to fix apache#5414. `HiveReader` initialization incorrectly uses the global hadoopConf as hiveconf, which causes reader to pollut the global hadoopConf and cause job read failure. ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes apache#5424 from Yikf/orc-read. Closes apache#5414 d6bdf7b [yikaifei] [KYUUBI apache#5414] Reader should not polluted the global hiveconf instance Authored-by: yikaifei <[email protected]> Signed-off-by: Cheng Pan <[email protected]>
lsm1 · Oct 17, 2023 · 47555eb · 47555eb
1 parent dcaacc3
commit 47555eb
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 1 deletion.
diff --git a/...-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala b/...-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala
@@ -64,7 +64,7 @@ case class HiveScan(
   }
 
   override def createReaderFactory(): PartitionReaderFactory = {
-    val hiveConf = fileIndex.hiveCatalog.hadoopConfiguration()
+    val hiveConf = new Configuration(fileIndex.hiveCatalog.hadoopConfiguration())
     addCatalogTableConfToConf(hiveConf, catalogTable)
 
     val table = HiveClientImpl.toHiveTable(catalogTable)

diff --git a/...connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala b/...connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
@@ -175,6 +175,23 @@ class HiveQuerySuite extends KyuubiHiveTest {
     }
   }
 
+  test("[KYUUBI #5414] Reader should not polluted the global hiveconf instance") {
+    withSparkSession() { spark =>
+      val table = "hive.default.hiveconf_test"
+      withTempPartitionedTable(spark, table, "ORC", hiveTable = true) {
+        spark.sql(
+          s"""
+             | INSERT OVERWRITE
+             | $table PARTITION(year = '2022')
+             | VALUES("yi", "08")
+             |""".stripMargin).collect()
+
+        checkQueryResult(s"select * from $table", spark, Array(Row.apply("yi", "2022", "08")))
+        checkQueryResult(s"select count(*) as c from $table", spark, Array(Row.apply(1)))
+      }
+    }
+  }
+
   test("Partitioned table insert and static partition value is empty string") {
     withSparkSession() { spark =>
       val table = "hive.default.employee"