From 638a44db43a658941fc4cbb9b22e22f87028178f Mon Sep 17 00:00:00 2001 From: madlnu Date: Thu, 24 Oct 2024 02:43:54 +0530 Subject: [PATCH] [KYUUBI #6402]: engine.share.level=GROUP enable for a list of hadoop groups --- docs/configuration/settings.md | 1 + .../org/apache/kyuubi/config/KyuubiConf.scala | 11 ++++++++++ .../org/apache/kyuubi/engine/EngineRef.scala | 5 ++++- .../kyuubi/session/HadoopGroupProvider.scala | 22 +++++++++++++++++-- .../KyuubiOperationPerGroupSuite.scala | 18 ++++++++++++--- 5 files changed, 51 insertions(+), 6 deletions(-) diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index a1b714d0d77..6d7fe046af5 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -509,6 +509,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.session.idle.timeout | PT6H | session idle timeout, it will be closed when it's not accessed for this duration | duration | 1.2.0 | | kyuubi.session.local.dir.allow.list || The local dir list that are allowed to access by the kyuubi session application. End-users might set some parameters such as `spark.files` and it will upload some local files when launching the kyuubi engine, if the local dir allow list is defined, kyuubi will check whether the path to upload is in the allow list. Note that, if it is empty, there is no limitation for that. And please use absolute paths. | set | 1.6.0 | | kyuubi.session.name | <undefined> | A human readable name of the session and we use empty string by default. This name will be recorded in the event. Note that, we only apply this value from session conf. | string | 1.4.0 | +| kyuubi.session.preferGroup | <undefined> | The hadoop group name for the group engine launch. This will be checked for the presence in the list of user's allowed groups. If present, it will take precedence for GROUP SHARE LEVEL execution. If this is not configured, the session will use the first group name from the list of groups as the primary group. | string | 1.9.3 | | kyuubi.session.proxy.user | <undefined> | An alternative to hive.server2.proxy.user. The current behavior is consistent with hive.server2.proxy.user and now only takes effect in RESTFul API. When both parameters are set, kyuubi.session.proxy.user takes precedence. | string | 1.9.0 | | kyuubi.session.timeout | PT6H | (deprecated)session timeout, it will be closed when it's not accessed for this duration | duration | 1.0.0 | | kyuubi.session.user.sign.enabled | false | Whether to verify the integrity of session user name on the engine side, e.g. Authz plugin in Spark. | boolean | 1.7.0 | diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala index 436ac5fd16b..64bf5b49eb9 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala @@ -2779,6 +2779,17 @@ object KyuubiConf { } .createWithDefault("hadoop") + val PREFERRED_GROUP: OptionalConfigEntry[String] = + buildConf("kyuubi.session.preferGroup") + .doc("The hadoop group name for the group engine launch. This will be checked " + + "for the presence in the list of user's allowed groups. If present, it will " + + "take precedence for GROUP SHARE LEVEL execution. If this is not configured, " + + "the session will use the first group name from the list of groups as the " + + "primary group.") + .version("1.9.3") + .stringConf + .createOptional + val SERVER_NAME: OptionalConfigEntry[String] = buildConf("kyuubi.server.name") .doc("The name of Kyuubi Server.") diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala index b7985fcf533..108434e8ad2 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala @@ -98,7 +98,10 @@ private[kyuubi] class EngineRef( } // user for launching engine - private[kyuubi] val appUser: String = if (doAsEnabled) routingUser else Utils.currentUser + private[kyuubi] val appUser: String = shareLevel match { + case GROUP => if (doAsEnabled) sessionUser else Utils.currentUser + case _ => if (doAsEnabled) routingUser else Utils.currentUser + } @VisibleForTesting private[kyuubi] val subdomain: String = conf.get(ENGINE_SHARE_LEVEL_SUBDOMAIN) match { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/HadoopGroupProvider.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/HadoopGroupProvider.scala index 2ae7bb157f1..8235c9d53eb 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/HadoopGroupProvider.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/HadoopGroupProvider.scala @@ -22,6 +22,7 @@ import java.util.{Map => JMap} import org.apache.hadoop.security.UserGroupInformation import org.apache.kyuubi.Logging +import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.plugin.GroupProvider /** @@ -29,8 +30,25 @@ import org.apache.kyuubi.plugin.GroupProvider * https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/GroupsMapping.html */ class HadoopGroupProvider extends GroupProvider with Logging { - override def primaryGroup(user: String, sessionConf: JMap[String, String]): String = - groups(user, sessionConf).head + override def primaryGroup(user: String, sessionConf: JMap[String, String]): String = { + val preferredGroup: Option[String] = if (sessionConf != null) { + Option(sessionConf.get(KyuubiConf.PREFERRED_GROUP.key)) + } else { + None + } + + val userGroups: Array[String] = groups(user, sessionConf) + + val primaryGroup = preferredGroup match { + case Some(group) if userGroups.contains(group) => group + case None => userGroups.headOption.getOrElse { + throw new NoSuchElementException("No groups available for the user") + } + case Some(group) => + throw new IllegalArgumentException(s"User is not part of the preferred group: $group") + } + primaryGroup + } override def groups(user: String, sessionConf: JMap[String, String]): Array[String] = UserGroupInformation.createRemoteUser(user).getGroupNames match { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerGroupSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerGroupSuite.scala index 173b4d2932a..72367bf9b41 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerGroupSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerGroupSuite.scala @@ -68,12 +68,24 @@ class KyuubiOperationPerGroupSuite extends WithKyuubiServer with SparkQueryTests } test("kyuubi defined function - system_user/session_user") { - withSessionConf(Map("hive.server2.proxy.user" -> "user1"))(Map.empty)(Map.empty) { + withSessionConf(Map("hive.server2.proxy.user" -> "user2"))(Map.empty)(Map.empty) { withJdbcStatement() { statement => val res = statement.executeQuery("select system_user() as c1, session_user() as c2") assert(res.next()) - assert(res.getString("c1") === "testGG") - assert(res.getString("c2") === "user1") + assert(res.getString("c1") === "user1") + assert(res.getString("c2") === "user2") + } + } + } + + test("ensure preferred group is chosen from list of groups") { + withSessionConf(Map("hive.server2.proxy.user" -> "user1"))(Map( + KyuubiConf.PREFERRED_GROUP.key -> "group_tt"))(Map.empty) { + withJdbcStatement() { statement => + val res = statement.executeQuery("set spark.app.name") + assert(res.next()) + val engineName = res.getString("value") + assert(engineName.startsWith(s"kyuubi_GROUP_${conf.get(KyuubiConf.ENGINE_TYPE)}_group_tt")) } } }