diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml index b2fcbdf8eca7d..bd311b3a98047 100644 --- a/connector/kafka-0-10-assembly/pom.xml +++ b/connector/kafka-0-10-assembly/pom.xml @@ -54,11 +54,6 @@ commons-codec provided - - commons-lang - commons-lang - provided - com.google.protobuf protobuf-java diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml index 577ec21530837..0e93526fce721 100644 --- a/connector/kinesis-asl-assembly/pom.xml +++ b/connector/kinesis-asl-assembly/pom.xml @@ -54,11 +54,6 @@ jackson-databind provided - - commons-lang - commons-lang - provided - org.glassfish.jersey.core jersey-client diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 5d933e34e40ba..975f249558bc4 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -46,7 +46,6 @@ commons-compress/1.26.1//commons-compress-1.26.1.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar commons-dbcp/1.4//commons-dbcp-1.4.jar commons-io/2.16.1//commons-io-2.16.1.jar -commons-lang/2.6//commons-lang-2.6.jar commons-lang3/3.14.0//commons-lang3-3.14.0.jar commons-math3/3.6.1//commons-math3-3.6.1.jar commons-pool/1.5.4//commons-pool-1.5.4.jar @@ -81,19 +80,19 @@ hadoop-cloud-storage/3.4.0//hadoop-cloud-storage-3.4.0.jar hadoop-huaweicloud/3.4.0//hadoop-huaweicloud-3.4.0.jar hadoop-shaded-guava/1.2.0//hadoop-shaded-guava-1.2.0.jar hadoop-yarn-server-web-proxy/3.4.0//hadoop-yarn-server-web-proxy-3.4.0.jar -hive-beeline/2.3.9//hive-beeline-2.3.9.jar -hive-cli/2.3.9//hive-cli-2.3.9.jar -hive-common/2.3.9//hive-common-2.3.9.jar -hive-exec/2.3.9/core/hive-exec-2.3.9-core.jar -hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar -hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar -hive-metastore/2.3.9//hive-metastore-2.3.9.jar -hive-serde/2.3.9//hive-serde-2.3.9.jar +hive-beeline/2.3.10//hive-beeline-2.3.10.jar +hive-cli/2.3.10//hive-cli-2.3.10.jar +hive-common/2.3.10//hive-common-2.3.10.jar +hive-exec/2.3.10/core/hive-exec-2.3.10-core.jar +hive-jdbc/2.3.10//hive-jdbc-2.3.10.jar +hive-llap-common/2.3.10//hive-llap-common-2.3.10.jar +hive-metastore/2.3.10//hive-metastore-2.3.10.jar +hive-serde/2.3.10//hive-serde-2.3.10.jar hive-service-rpc/4.0.0//hive-service-rpc-4.0.0.jar -hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar -hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar -hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar -hive-shims/2.3.9//hive-shims-2.3.9.jar +hive-shims-0.23/2.3.10//hive-shims-0.23-2.3.10.jar +hive-shims-common/2.3.10//hive-shims-common-2.3.10.jar +hive-shims-scheduler/2.3.10//hive-shims-scheduler-2.3.10.jar +hive-shims/2.3.10//hive-shims-2.3.10.jar hive-storage-api/2.8.1//hive-storage-api-2.8.1.jar hk2-api/3.0.3//hk2-api-3.0.3.jar hk2-locator/3.0.3//hk2-locator-3.0.3.jar @@ -184,7 +183,7 @@ kubernetes-model-storageclass/6.12.1//kubernetes-model-storageclass-6.12.1.jar lapack/3.0.3//lapack-3.0.3.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar -libthrift/0.12.0//libthrift-0.12.0.jar +libthrift/0.16.0//libthrift-0.16.0.jar log4j-1.2-api/2.22.1//log4j-1.2-api-2.22.1.jar log4j-api/2.22.1//log4j-api-2.22.1.jar log4j-core/2.22.1//log4j-core-2.22.1.jar diff --git a/docs/building-spark.md b/docs/building-spark.md index 73fc31610d95d..8b04ac9b4a34f 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -85,9 +85,9 @@ Example: To enable Hive integration for Spark SQL along with its JDBC server and CLI, add the `-Phive` and `-Phive-thriftserver` profiles to your existing build options. -By default Spark will build with Hive 2.3.9. +By default Spark will build with Hive 2.3.10. - # With Hive 2.3.9 support + # With Hive 2.3.10 support ./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package ## Packaging without Hadoop Dependencies for YARN diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md index b51cde53bd8fd..566dcb33a25d9 100644 --- a/docs/sql-data-sources-hive-tables.md +++ b/docs/sql-data-sources-hive-tables.md @@ -127,10 +127,10 @@ The following options can be used to configure the version of Hive that is used Property NameDefaultMeaningSince Version spark.sql.hive.metastore.version - 2.3.9 + 2.3.10 Version of the Hive metastore. Available - options are 2.0.0 through 2.3.9 and 3.0.0 through 3.1.3. + options are 2.0.0 through 2.3.10 and 3.0.0 through 3.1.3. 1.4.0 @@ -142,9 +142,9 @@ The following options can be used to configure the version of Hive that is used property can be one of four options:
  1. builtin
  2. - Use Hive 2.3.9, which is bundled with the Spark assembly when -Phive is + Use Hive 2.3.10, which is bundled with the Spark assembly when -Phive is enabled. When this option is chosen, spark.sql.hive.metastore.version must be - either 2.3.9 or not defined. + either 2.3.10 or not defined.
  3. maven
  4. Use Hive jars of specified version downloaded from Maven repositories. This configuration is not generally recommended for production deployments. diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index fa49d6402b180..a206ef81cdb98 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -1067,7 +1067,7 @@ Python UDF registration is unchanged. Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs. Currently, Hive SerDes and UDFs are based on built-in Hive, and Spark SQL can be connected to different versions of Hive Metastore -(from 0.12.0 to 2.3.9 and 3.0.0 to 3.1.3. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). +(from 2.0.0 to 2.3.10 and 3.0.0 to 3.1.3. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). #### Deploying in Existing Hive Warehouses {:.no_toc} diff --git a/pom.xml b/pom.xml index f6f11d94cce32..aee3af7c3f6bf 100644 --- a/pom.xml +++ b/pom.xml @@ -132,8 +132,8 @@ org.apache.hive core - 2.3.9 - 2.3.9 + 2.3.10 + 2.3.10 2.3 @@ -192,8 +192,6 @@ 1.17.0 1.26.1 2.16.1 - - 2.6 3.14.0 @@ -206,7 +204,7 @@ 3.5.2 3.0.0 2.2.11 - 0.12.0 + 0.16.0 4.13.1 1.1 4.17.0 @@ -615,11 +613,6 @@ commons-text 1.12.0 - - commons-lang - commons-lang - ${commons-lang2.version} - commons-io commons-io @@ -2294,8 +2287,8 @@ janino - org.pentaho - pentaho-aggdesigner-algorithm + net.hydromatic + aggdesigner-algorithm @@ -2365,6 +2358,10 @@ org.codehaus.groovy groovy-all + + com.lmax + disruptor + @@ -2805,6 +2802,10 @@ org.slf4j slf4j-api + + javax.annotation + javax.annotation-api + @@ -2898,12 +2899,6 @@ hive-storage-api ${hive.storage.version} ${hive.storage.scope} - - - commons-lang - commons-lang - - commons-cli diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java index 175412ed98c6c..ef91f94eeec2b 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java @@ -30,6 +30,7 @@ import org.apache.thrift.TProcessorFactory; import org.apache.thrift.transport.TSaslClientTransport; import org.apache.thrift.transport.TTransport; +import org.apache.thrift.transport.TTransportException; public final class KerberosSaslHelper { @@ -68,8 +69,8 @@ public static TTransport createSubjectAssumedTransport(String principal, new TSaslClientTransport("GSSAPI", null, names[0], names[1], saslProps, null, underlyingTransport); return new TSubjectAssumingTransport(saslTransport); - } catch (SaslException se) { - throw new IOException("Could not instantiate SASL transport", se); + } catch (SaslException | TTransportException se) { + throw new IOException("Could not instantiate transport", se); } } diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java index c06f6ec34653f..5ac29950f4f85 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java @@ -38,6 +38,7 @@ import org.apache.thrift.transport.TSaslClientTransport; import org.apache.thrift.transport.TSaslServerTransport; import org.apache.thrift.transport.TTransport; +import org.apache.thrift.transport.TTransportException; import org.apache.thrift.transport.TTransportFactory; public final class PlainSaslHelper { @@ -64,7 +65,7 @@ public static TTransportFactory getPlainTransportFactory(String authTypeStr) } public static TTransport getPlainTransport(String username, String password, - TTransport underlyingTransport) throws SaslException { + TTransport underlyingTransport) throws SaslException, TTransportException { return new TSaslClientTransport("PLAIN", null, null, null, new HashMap(), new PlainCallbackHandler(username, password), underlyingTransport); } diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java index 1205d21be6be6..b727b4e27de8d 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java @@ -45,11 +45,12 @@ public TSetIpAddressProcessor(Iface iface) { } @Override - public boolean process(final TProtocol in, final TProtocol out) throws TException { + public void process(final TProtocol in, final TProtocol out) throws TException { setIpAddress(in); setUserName(in); try { - return super.process(in, out); + super.process(in, out); + return; } finally { THREAD_LOCAL_USER_NAME.remove(); THREAD_LOCAL_IP_ADDRESS.remove(); diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java index 4d99496876fdc..c7fa7b5f3e0ac 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java @@ -91,16 +91,10 @@ protected void initializeServer() { // Server args int maxMessageSize = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_MAX_MESSAGE_SIZE); - int requestTimeout = (int) hiveConf.getTimeVar( - HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_TIMEOUT, TimeUnit.SECONDS); - int beBackoffSlotLength = (int) hiveConf.getTimeVar( - HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_BEBACKOFF_SLOT_LENGTH, TimeUnit.MILLISECONDS); TThreadPoolServer.Args sargs = new TThreadPoolServer.Args(serverSocket) .processorFactory(processorFactory).transportFactory(transportFactory) .protocolFactory(new TBinaryProtocol.Factory()) .inputProtocolFactory(new TBinaryProtocol.Factory(true, true, maxMessageSize, maxMessageSize)) - .requestTimeout(requestTimeout).requestTimeoutUnit(TimeUnit.SECONDS) - .beBackoffSlotLength(beBackoffSlotLength).beBackoffSlotLengthUnit(TimeUnit.MILLISECONDS) .executorService(executorService); // TCP Server diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java index 4b18e2950a3de..ac0a6d59bee8e 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java @@ -83,6 +83,16 @@ public void setSessionHandle(SessionHandle sessionHandle) { public SessionHandle getSessionHandle() { return sessionHandle; } + + @Override + public T unwrap(Class aClass) { + return null; + } + + @Override + public boolean isWrapperFor(Class aClass) { + return false; + } } public ThriftCLIService(CLIService service, String serviceName) { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 101d31d609852..30201dcee552d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -74,7 +74,7 @@ private[spark] object HiveUtils extends Logging { val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version") .doc("Version of the Hive metastore. Available options are " + - "2.0.0 through 2.3.9 and " + + "2.0.0 through 2.3.10 and " + "3.0.0 through 3.1.3.") .version("1.4.0") .stringConf diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 1cd60c0d3fffe..130da78623b79 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -1358,7 +1358,7 @@ private[hive] object HiveClientImpl extends Logging { try { Hive.getWithoutRegisterFns(hiveConf) } catch { - // SPARK-37069: not all Hive versions have the above method (e.g., Hive 2.3.9 has it but + // SPARK-37069: not all Hive versions have the above method (e.g., Hive 2.3.10 has it but // 2.3.8 don't), therefore here we fallback when encountering the exception. case _: NoSuchMethodError => Hive.get(hiveConf) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index 564c87a0fca8e..d172af21a9170 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -59,13 +59,12 @@ package object client { "org.pentaho:pentaho-aggdesigner-algorithm")) // Since HIVE-23980, calcite-core included in Hive package jar. - case object v2_3 extends HiveVersion("2.3.9", + case object v2_3 extends HiveVersion("2.3.10", exclusions = Seq("org.apache.calcite:calcite-core", "org.apache.calcite:calcite-druid", "org.apache.calcite.avatica:avatica", - "com.fasterxml.jackson.core:*", "org.apache.curator:*", - "org.pentaho:pentaho-aggdesigner-algorithm", + "net.hydromatic:aggdesigner-algorithm", "org.apache.hive:hive-vector-code-gen")) // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 726341ffdf9e3..55dd9bf6efd50 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -211,7 +211,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { tryDownloadSpark(version, sparkTestingDir.getCanonicalPath) } - // Extract major.minor for testing Spark 3.1.x and 3.0.x with metastore 2.3.9 and Java 11. + // Extract major.minor for testing Spark 3.1.x and 3.0.x with metastore 2.3.10 and Java 11. val hiveMetastoreVersion = """^\d+\.\d+""".r.findFirstIn(hiveVersion).get val args = Seq( "--name", "prepare testing tables", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index c7aa412959097..e88a37f019b7d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -149,7 +149,7 @@ class HiveSparkSubmitSuite "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", - "--conf", "spark.sql.hive.metastore.version=2.3.9", + "--conf", "spark.sql.hive.metastore.version=2.3.10", "--conf", "spark.sql.hive.metastore.jars=maven", "--driver-java-options", "-Dderby.system.durability=test", unusedJar.toString) @@ -370,7 +370,7 @@ class HiveSparkSubmitSuite "--master", "local-cluster[2,1,512]", "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", s"${LEGACY_TIME_PARSER_POLICY.key}=LEGACY", - "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.9", + "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.10", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", "--conf", s"spark.hadoop.javax.jdo.option.ConnectionURL=$metastore", unusedJar.toString) @@ -387,7 +387,7 @@ object SetMetastoreURLTest extends Logging { val builder = SparkSession.builder() .config(sparkConf) .config(UI_ENABLED.key, "false") - .config(HiveUtils.HIVE_METASTORE_VERSION.key, "2.3.9") + .config(HiveUtils.HIVE_METASTORE_VERSION.key, "2.3.10") // The issue described in SPARK-16901 only appear when // spark.sql.hive.metastore.jars is not set to builtin. .config(HiveUtils.HIVE_METASTORE_JARS.key, "maven") @@ -698,7 +698,7 @@ object SparkSQLConfTest extends Logging { val filteredSettings = super.getAll.filterNot(e => isMetastoreSetting(e._1)) // Always add these two metastore settings at the beginning. - (HiveUtils.HIVE_METASTORE_VERSION.key -> "2.3.9") +: + (HiveUtils.HIVE_METASTORE_VERSION.key -> "2.3.10") +: (HiveUtils.HIVE_METASTORE_JARS.key -> "maven") +: filteredSettings } @@ -726,7 +726,7 @@ object SPARK_9757 extends QueryTest { val hiveWarehouseLocation = Utils.createTempDir() val sparkContext = new SparkContext( new SparkConf() - .set(HiveUtils.HIVE_METASTORE_VERSION.key, "2.3.9") + .set(HiveUtils.HIVE_METASTORE_VERSION.key, "2.3.10") .set(HiveUtils.HIVE_METASTORE_JARS.key, "maven") .set(UI_ENABLED, false) .set(WAREHOUSE_PATH.key, hiveWarehouseLocation.toString)) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 397da6c18b50a..5e58959ca4f7d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -1627,10 +1627,8 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd test("SPARK-33084: Add jar support Ivy URI in SQL") { val testData = TestHive.getHiveFile("data/files/sample.json").toURI withTable("t") { - // hive-catalog-core has some transitive dependencies which dont exist on maven central - // and hence cannot be found in the test environment or are non-jar (.pom) which cause - // failures in tests. Use transitive=false as it should be good enough to test the Ivy - // support in Hive ADD JAR + // Use transitive=false as it should be good enough to test the Ivy support + // in Hive ADD JAR sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion" + "?transitive=false") sql(