diff --git a/streampark-common/src/main/java/org/apache/streampark/common/enums/SparkDevelopmentMode.java b/streampark-common/src/main/java/org/apache/streampark/common/enums/SparkDevelopmentMode.java index 29c5b97274..efdd66c00b 100644 --- a/streampark-common/src/main/java/org/apache/streampark/common/enums/SparkDevelopmentMode.java +++ b/streampark-common/src/main/java/org/apache/streampark/common/enums/SparkDevelopmentMode.java @@ -20,7 +20,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; -/** The flink deployment mode enum. */ +/** The spark deployment mode enum. */ public enum SparkDevelopmentMode { /** Unknown type replace null */ @@ -29,8 +29,11 @@ public enum SparkDevelopmentMode { /** custom code */ CUSTOM_CODE("Custom Code", 1), - /** spark SQL */ - SPARK_SQL("Spark SQL", 2); + /** Spark SQL */ + SPARK_SQL("Spark SQL", 2), + + /** Py spark Mode */ + PYSPARK("Python Spark", 3); private final String name; @@ -44,17 +47,22 @@ public enum SparkDevelopmentMode { /** * Try to resolve the mode value into {@link SparkDevelopmentMode}. * - * @param value The mode value of potential flink deployment mode. - * @return The parsed flink deployment mode. + * @param value The mode value of potential spark deployment mode. + * @return The parsed spark deployment mode. */ @Nonnull public static SparkDevelopmentMode valueOf(@Nullable Integer value) { - for (SparkDevelopmentMode flinkDevelopmentMode : values()) { - if (flinkDevelopmentMode.mode.equals(value)) { - return flinkDevelopmentMode; + for (SparkDevelopmentMode sparkDevelopmentMode : values()) { + if (sparkDevelopmentMode.mode.equals(value)) { + return sparkDevelopmentMode; } } return SparkDevelopmentMode.UNKNOWN; } + /** Get the mode value of the current {@link SparkDevelopmentMode} enum. */ + @Nonnull + public Integer getMode() { + return mode; + } } diff --git a/streampark-common/src/main/java/org/apache/streampark/common/enums/SparkSqlValidationFailedType.java b/streampark-common/src/main/java/org/apache/streampark/common/enums/SparkSqlValidationFailedType.java new file mode 100644 index 0000000000..a26f884a3e --- /dev/null +++ b/streampark-common/src/main/java/org/apache/streampark/common/enums/SparkSqlValidationFailedType.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.common.enums; + +import javax.annotation.Nullable; + +/** Spark SQL validation failed type enum. */ +public enum SparkSqlValidationFailedType { + + /** Basic test failed (such as null, etc.) */ + VERIFY_FAILED(1), + + /** syntax error */ + SYNTAX_ERROR(2), + + /** unsupported dialect */ + UNSUPPORTED_DIALECT(3), + + /** unsupported sql command */ + UNSUPPORTED_SQL(4), + + /** Not at the end of ";" */ + ENDS_WITH(5), + + /** Class exception */ + CLASS_ERROR(6); + + private final int failedType; + + SparkSqlValidationFailedType(int failedType) { + this.failedType = failedType; + } + + /** + * Try to resolve the given spark SQL validation failed type value into a known {@link + * SparkSqlValidationFailedType} enum. + */ + @Nullable + public static SparkSqlValidationFailedType of(Integer value) { + for (SparkSqlValidationFailedType type : values()) { + if (type.failedType == value) { + return type; + } + } + return null; + } + + public int getFailedType() { + return failedType; + } +} diff --git a/streampark-common/src/main/scala/org/apache/streampark/common/conf/ConfigKeys.scala b/streampark-common/src/main/scala/org/apache/streampark/common/conf/ConfigKeys.scala index a7fd4b1194..7f321ee92c 100644 --- a/streampark-common/src/main/scala/org/apache/streampark/common/conf/ConfigKeys.scala +++ b/streampark-common/src/main/scala/org/apache/streampark/common/conf/ConfigKeys.scala @@ -65,6 +65,33 @@ object ConfigKeys { val KEY_SPARK_BATCH_DURATION = "spark.batch.duration" + val KEY_SPARK_DRIVER_CORES = "spark.driver.cores" + + val KEY_SPARK_DRIVER_MEMORY = "spark.driver.memory" + + val KEY_SPARK_EXECUTOR_INSTANCES = "spark.executor.instances" + + val KEY_SPARK_EXECUTOR_CORES = "spark.executor.cores" + + val KEY_SPARK_EXECUTOR_MEMORY = "spark.executor.memory" + + val KEY_SPARK_DYNAMIC_ALLOCATION_ENABLED = "spark.dynamicAllocation.enabled" + + val KEY_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS = "spark.dynamicAllocation.maxExecutors" + + val KEY_SPARK_YARN_QUEUE = "spark.yarn.queue" + + val KEY_SPARK_YARN_QUEUE_NAME = "yarnQueueName" + + val KEY_SPARK_YARN_QUEUE_LABEL = "yarnQueueLabel" + + val KEY_SPARK_YARN_AM_NODE_LABEL = "spark.yarn.am.nodeLabelExpression" + + val KEY_SPARK_YARN_EXECUTOR_NODE_LABEL = "spark.yarn.executor.nodeLabelExpression" + + def KEY_SPARK_SQL(prefix: String = null): String = + s"${Option(prefix).getOrElse("")}sql" + /** about config flink */ def KEY_APP_CONF(prefix: String = null): String = s"${Option(prefix).getOrElse("")}conf" @@ -87,7 +114,7 @@ object ConfigKeys { val KEY_FLINK_TABLE_PREFIX = "flink.table." - val KEY_SPARK_PROPERTY_PREFIX = "spark.property." + val KEY_SPARK_PROPERTY_PREFIX = "spark." val KEY_APP_PREFIX = "app." diff --git a/streampark-common/src/main/scala/org/apache/streampark/common/util/PropertiesUtils.scala b/streampark-common/src/main/scala/org/apache/streampark/common/util/PropertiesUtils.scala index c5153e986d..9898ae9731 100644 --- a/streampark-common/src/main/scala/org/apache/streampark/common/util/PropertiesUtils.scala +++ b/streampark-common/src/main/scala/org/apache/streampark/common/util/PropertiesUtils.scala @@ -37,6 +37,12 @@ object PropertiesUtils extends Logger { private[this] lazy val PROPERTY_PATTERN = Pattern.compile("(.*?)=(.*?)") + private[this] lazy val SPARK_PROPERTY_COMPLEX_PATTERN = Pattern.compile("^[\"']?(.*?)=(.*?)[\"']?$") + + // scalastyle:off + private[this] lazy val SPARK_ARGUMENT_REGEXP = "\"?(\\s+|$)(?=(([^\"]*\"){2})*[^\"]*$)\"?" + // scalastyle:on + private[this] lazy val MULTI_PROPERTY_REGEXP = "-D(.*?)\\s*=\\s*[\\\"|'](.*)[\\\"|']" private[this] lazy val MULTI_PROPERTY_PATTERN = Pattern.compile(MULTI_PROPERTY_REGEXP) @@ -380,4 +386,48 @@ object PropertiesUtils extends Logger { new JavaHashMap[String, JavaMap[String, String]](map) } + /** extract spark configuration from sparkApplication.appProperties */ + @Nonnull def extractSparkPropertiesAsJava(properties: String): JavaMap[String, String] = + new JavaHashMap[String, String](extractSparkProperties(properties)) + + @Nonnull def extractSparkProperties(properties: String): Map[String, String] = { + if (StringUtils.isEmpty(properties)) Map.empty[String, String] + else { + val map = mutable.Map[String, String]() + properties.split("(\\s)*(--conf|-c)(\\s)+") match { + case d if Utils.isNotEmpty(d) => + d.foreach(x => { + if (x.nonEmpty) { + val p = SPARK_PROPERTY_COMPLEX_PATTERN.matcher(x) + if (p.matches) { + map += p.group(1).trim -> p.group(2).trim + } + } + }) + case _ => + } + map.toMap + } + } + + /** extract spark configuration from sparkApplication.appArgs */ + @Nonnull def extractSparkArgumentsAsJava(arguments: String): JavaList[String] = + new JavaArrayList[String](extractSparkArguments(arguments)) + + @Nonnull def extractSparkArguments(arguments: String): List[String] = { + if (StringUtils.isEmpty(arguments)) List.empty[String] + else { + val list = List[String]() + arguments.split(SPARK_ARGUMENT_REGEXP) match { + case d if Utils.isNotEmpty(d) => + d.foreach(x => { + if (x.nonEmpty) { + list :+ x + } + }) + case _ => + } + list + } + } } diff --git a/streampark-console/streampark-console-service/src/main/assembly/script/schema/mysql-schema.sql b/streampark-console/streampark-console-service/src/main/assembly/script/schema/mysql-schema.sql index 69abc8634d..b13800414f 100644 --- a/streampark-console/streampark-console-service/src/main/assembly/script/schema/mysql-schema.sql +++ b/streampark-console/streampark-console-service/src/main/assembly/script/schema/mysql-schema.sql @@ -567,65 +567,56 @@ drop table if exists `t_spark_app`; create table `t_spark_app` ( `id` bigint not null auto_increment, `team_id` bigint not null, - `job_type` tinyint default null, - `execution_mode` tinyint default null, + `job_type` tinyint default null comment '(1)custom code(2)spark SQL', + `app_type` tinyint default null comment '(1)Apache Spark(2)StreamPark Spark', + `version_id` bigint default null comment 'spark version', + `app_name` varchar(255) collate utf8mb4_general_ci default null comment 'spark.app.name', + `execution_mode` tinyint default null comment 'spark.submit.deployMode(1)cluster(2)client', `resource_from` tinyint default null, `project_id` bigint default null, - `job_name` varchar(255) collate utf8mb4_general_ci default null, `module` varchar(255) collate utf8mb4_general_ci default null, + `main_class` varchar(255) collate utf8mb4_general_ci default null comment 'The entry point for your application (e.g. org.apache.spark.examples.SparkPi)', `jar` varchar(255) collate utf8mb4_general_ci default null, `jar_check_sum` bigint default null, - `main_class` varchar(255) collate utf8mb4_general_ci default null, - `args` text collate utf8mb4_general_ci, - `options` text collate utf8mb4_general_ci, - `hot_params` text collate utf8mb4_general_ci, - `user_id` bigint default null, - `app_id` varchar(64) collate utf8mb4_general_ci default null, - `app_type` tinyint default null, - `duration` bigint default null, - `job_id` varchar(64) collate utf8mb4_general_ci default null, - `job_manager_url` varchar(255) collate utf8mb4_general_ci default null, - `version_id` bigint default null, - `cluster_id` varchar(45) collate utf8mb4_general_ci default null, - `k8s_name` varchar(63) collate utf8mb4_general_ci default null, - `k8s_namespace` varchar(63) collate utf8mb4_general_ci default null, - `spark_image` varchar(128) collate utf8mb4_general_ci default null, - `state` int default null, + `app_properties` text collate utf8mb4_general_ci comment 'Arbitrary Spark configuration property in key=value format (e.g. spark.driver.cores=1)', + `app_args` text collate utf8mb4_general_ci comment 'Arguments passed to the main method of your main class', + `app_id` varchar(64) collate utf8mb4_general_ci default null comment '(1)application_id on yarn(2)driver_pod_name on k8s', + `yarn_queue` varchar(128) collate utf8mb4_general_ci default null, + `k8s_master_url` varchar(128) collate utf8mb4_general_ci default null, + `k8s_container_image` varchar(128) collate utf8mb4_general_ci default null, + `k8s_image_pull_policy` tinyint default 1, + `k8s_service_account` varchar(64) collate utf8mb4_general_ci default null, + `k8s_namespace` varchar(64) collate utf8mb4_general_ci default null, + `hadoop_user` varchar(64) collate utf8mb4_general_ci default null, `restart_size` int default null, `restart_count` int default null, - `cp_threshold` int default null, - `cp_max_failure_interval` int default null, - `cp_failure_rate_interval` int default null, - `cp_failure_action` tinyint default null, - `dynamic_properties` text collate utf8mb4_general_ci, - `description` varchar(255) collate utf8mb4_general_ci default null, - `resolve_order` tinyint default null, - `k8s_rest_exposed_type` tinyint default null, - `jm_memory` int default null, - `tm_memory` int default null, - `total_task` int default null, - `total_tm` int default null, - `total_slot` int default null, - `available_slot` int default null, + `state` int default null, + `options` text collate utf8mb4_general_ci, `option_state` tinyint default null, - `tracking` tinyint default null, - `create_time` datetime default null comment 'create time', - `modify_time` datetime default null comment 'modify time', `option_time` datetime default null, + `user_id` bigint default null, + `description` varchar(255) collate utf8mb4_general_ci default null, + `tracking` tinyint default null, `release` tinyint default 1, `build` tinyint default 1, + `alert_id` bigint default null, + `create_time` datetime default null, + `modify_time` datetime default null, `start_time` datetime default null, `end_time` datetime default null, - `alert_id` bigint default null, - `k8s_pod_template` text collate utf8mb4_general_ci, - `k8s_jm_pod_template` text collate utf8mb4_general_ci, - `k8s_tm_pod_template` text collate utf8mb4_general_ci, - `k8s_hadoop_integration` tinyint default 0, - `spark_cluster_id` bigint default null, - `ingress_template` text collate utf8mb4_general_ci, - `default_mode_ingress` text collate utf8mb4_general_ci, - `tags` varchar(500) default null, - `hadoop_user` varchar(64) collate utf8mb4_general_ci default null, + `duration` bigint default null, + `tags` varchar(500) collate utf8mb4_general_ci default null, + `driver_cores` varchar(64) collate utf8mb4_general_ci default null, + `driver_memory` varchar(64) collate utf8mb4_general_ci default null, + `executor_cores` varchar(64) collate utf8mb4_general_ci default null, + `executor_memory` varchar(64) collate utf8mb4_general_ci default null, + `executor_max_nums` varchar(64) collate utf8mb4_general_ci default null, + `num_tasks` bigint default null, + `num_completed_tasks` bigint default null, + `num_stages` bigint default null, + `num_completed_stages` bigint default null, + `used_memory` bigint default null, + `used_v_cores` bigint default null, primary key (`id`) using btree, key `inx_job_type` (`job_type`) using btree, key `inx_track` (`tracking`) using btree, @@ -651,4 +642,69 @@ create table `t_spark_log` ( ) engine=innodb auto_increment=100000 default charset=utf8mb4 collate=utf8mb4_general_ci; +-- ---------------------------- +-- table structure for t_spark_effective +-- ---------------------------- +drop table if exists `t_spark_effective`; +create table `t_spark_effective` ( + `id` bigint not null auto_increment, + `app_id` bigint not null, + `target_type` tinyint not null comment '1) config 2) spark sql', + `target_id` bigint not null comment 'configid or sqlid', + `create_time` datetime default null comment 'create time', + primary key (`id`) using btree, + unique key `un_effective_inx` (`app_id`,`target_type`) using btree +) engine=innodb auto_increment=100000 default charset=utf8mb4 collate=utf8mb4_general_ci; + + +-- ---------------------------- +-- table structure for t_spark_config +-- ---------------------------- +drop table if exists `t_spark_config`; +create table `t_spark_config` ( + `id` bigint not null auto_increment, + `app_id` bigint not null, + `format` tinyint not null default 0, + `version` int not null, + `latest` tinyint not null default 0, + `content` text collate utf8mb4_general_ci not null, + `create_time` datetime default null comment 'create time', + primary key (`id`) using btree +) engine=innodb auto_increment=100000 default charset=utf8mb4 collate=utf8mb4_general_ci; + + +-- ---------------------------- +-- Table structure for t_spark_sql +-- ---------------------------- +drop table if exists `t_spark_sql`; +create table `t_spark_sql` ( + `id` bigint not null auto_increment, + `app_id` bigint default null, + `sql` text collate utf8mb4_general_ci, + `team_resource` varchar(64) collate utf8mb4_general_ci, + `dependency` text collate utf8mb4_general_ci, + `version` int default null, + `candidate` tinyint not null default 1, + `create_time` datetime default null comment 'create time', + primary key (`id`) using btree +) engine=innodb auto_increment=100000 default charset=utf8mb4 collate=utf8mb4_general_ci; + + +-- ---------------------------- +-- table structure for t_spark_app_backup +-- ---------------------------- +drop table if exists `t_spark_app_backup`; +create table `t_spark_app_backup` ( + `id` bigint not null auto_increment, + `app_id` bigint default null, + `sql_id` bigint default null, + `config_id` bigint default null, + `version` int default null, + `path` varchar(128) collate utf8mb4_general_ci default null, + `description` varchar(255) collate utf8mb4_general_ci default null, + `create_time` datetime default null comment 'create time', + primary key (`id`) using btree +) engine=innodb auto_increment=100000 default charset=utf8mb4 collate=utf8mb4_general_ci; + + set foreign_key_checks = 1; diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/aspect/AppChangeEventAspect.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/aspect/AppChangeEventAspect.java index ef8adce652..4c1a28e3f7 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/aspect/AppChangeEventAspect.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/aspect/AppChangeEventAspect.java @@ -17,7 +17,9 @@ package org.apache.streampark.console.core.aspect; +import org.apache.streampark.console.core.controller.SparkApplicationController; import org.apache.streampark.console.core.watcher.FlinkAppHttpWatcher; +import org.apache.streampark.console.core.watcher.SparkAppHttpWatcher; import lombok.extern.slf4j.Slf4j; import org.aspectj.lang.ProceedingJoinPoint; @@ -36,6 +38,9 @@ public class AppChangeEventAspect { @Autowired private FlinkAppHttpWatcher flinkAppHttpWatcher; + @Autowired + private SparkAppHttpWatcher sparkAppHttpWatcher; + @Pointcut("@annotation(org.apache.streampark.console.core.annotation.AppChangeEvent)") public void appChangeEventPointcut() { } @@ -45,7 +50,11 @@ public Object appChangeEvent(ProceedingJoinPoint joinPoint) throws Throwable { MethodSignature methodSignature = (MethodSignature) joinPoint.getSignature(); log.debug("appUpdated aspect, method:{}", methodSignature.getName()); Object target = joinPoint.proceed(); - flinkAppHttpWatcher.init(); + if (joinPoint.getTarget() instanceof SparkApplicationController) { + sparkAppHttpWatcher.init(); + } else { + flinkAppHttpWatcher.init(); + } return target; } diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/controller/SparkConfigController.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/controller/SparkConfigController.java new file mode 100644 index 0000000000..cc58bb2169 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/controller/SparkConfigController.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.controller; + +import org.apache.streampark.common.util.HadoopConfigUtils; +import org.apache.streampark.console.base.domain.RestRequest; +import org.apache.streampark.console.base.domain.RestResponse; +import org.apache.streampark.console.core.entity.SparkApplication; +import org.apache.streampark.console.core.entity.SparkApplicationConfig; +import org.apache.streampark.console.core.service.SparkApplicationConfigService; + +import org.apache.shiro.authz.annotation.RequiresPermissions; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.google.common.collect.ImmutableMap; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.validation.annotation.Validated; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.util.List; +import java.util.Map; + +@Slf4j +@Validated +@RestController +@RequestMapping("spark/conf") +public class SparkConfigController { + + @Autowired + private SparkApplicationConfigService applicationConfigService; + + @PostMapping("get") + public RestResponse get(Long id) { + SparkApplicationConfig config = applicationConfigService.get(id); + return RestResponse.success(config); + } + + @PostMapping("template") + public RestResponse template() { + String config = applicationConfigService.readTemplate(); + return RestResponse.success(config); + } + + @PostMapping("list") + public RestResponse list(SparkApplicationConfig config, RestRequest request) { + IPage page = applicationConfigService.getPage(config, request); + return RestResponse.success(page); + } + + @PostMapping("history") + public RestResponse history(SparkApplication application) { + List history = applicationConfigService.list(application.getId()); + return RestResponse.success(history); + } + + @PostMapping("delete") + @RequiresPermissions("conf:delete") + public RestResponse delete(Long id) { + Boolean deleted = applicationConfigService.removeById(id); + return RestResponse.success(deleted); + } + + @PostMapping("sysHadoopConf") + @RequiresPermissions("app:create") + public RestResponse getSystemHadoopConfig() { + Map> result = ImmutableMap.of( + "hadoop", HadoopConfigUtils.readSystemHadoopConf(), + "hive", HadoopConfigUtils.readSystemHiveConf()); + return RestResponse.success(result); + } +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/controller/SparkSqlController.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/controller/SparkSqlController.java new file mode 100644 index 0000000000..961d1b0aed --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/controller/SparkSqlController.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.controller; + +import org.apache.streampark.console.base.domain.RestRequest; +import org.apache.streampark.console.base.domain.RestResponse; +import org.apache.streampark.console.base.exception.ApiAlertException; +import org.apache.streampark.console.base.exception.InternalException; +import org.apache.streampark.console.core.annotation.Permission; +import org.apache.streampark.console.core.entity.Application; +import org.apache.streampark.console.core.entity.SparkSql; +import org.apache.streampark.console.core.service.SparkSqlService; +import org.apache.streampark.console.core.service.SqlCompleteService; +import org.apache.streampark.console.core.service.VariableService; +import org.apache.streampark.spark.core.util.SparkSqlValidationResult; + +import org.apache.shiro.authz.annotation.RequiresPermissions; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.validation.annotation.Validated; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import javax.validation.constraints.NotNull; + +import java.util.List; + +@Slf4j +@Validated +@RestController +@RequestMapping("spark/sql") +public class SparkSqlController { + + public static final String TYPE = "type"; + public static final String START = "start"; + public static final String END = "end"; + + @Autowired + private SparkSqlService sparkSqlService; + + @Autowired + private VariableService variableService; + + @Autowired + private SqlCompleteService sqlComplete; + + @PostMapping("verify") + public RestResponse verify(String sql, Long versionId, Long teamId) { + sql = variableService.replaceVariable(teamId, sql); + SparkSqlValidationResult sparkSqlValidationResult = sparkSqlService.verifySql(sql, versionId); + if (!sparkSqlValidationResult.success()) { + // record error type, such as error sql, reason and error start/end line + String exception = sparkSqlValidationResult.exception(); + RestResponse response = RestResponse.success() + .data(false) + .message(exception) + .put(TYPE, sparkSqlValidationResult.failedType().getFailedType()) + .put(START, sparkSqlValidationResult.lineStart()) + .put(END, sparkSqlValidationResult.lineEnd()); + + if (sparkSqlValidationResult.errorLine() > 0) { + response + .put(START, sparkSqlValidationResult.errorLine()) + .put(END, sparkSqlValidationResult.errorLine() + 1); + } + return response; + } + return RestResponse.success(true); + } + + @PostMapping("list") + @Permission(app = "#sparkSql.appId", team = "#sparkSql.teamId") + public RestResponse list(SparkSql sparkSql, RestRequest request) { + IPage page = sparkSqlService.getPage(sparkSql.getAppId(), request); + return RestResponse.success(page); + } + + @PostMapping("delete") + @RequiresPermissions("sql:delete") + @Permission(app = "#sparkSql.appId", team = "#sparkSql.teamId") + public RestResponse delete(SparkSql sparkSql) { + Boolean deleted = sparkSqlService.removeById(sparkSql.getSql()); + return RestResponse.success(deleted); + } + + @PostMapping("get") + @Permission(app = "#appId", team = "#teamId") + public RestResponse get(Long appId, Long teamId, String id) throws InternalException { + ApiAlertException.throwIfTrue( + appId == null || teamId == null, "Permission denied, appId and teamId cannot be null"); + String[] array = id.split(","); + SparkSql sparkSql1 = sparkSqlService.getById(array[0]); + sparkSql1.base64Encode(); + if (array.length == 1) { + return RestResponse.success(sparkSql1); + } + SparkSql sparkSql2 = sparkSqlService.getById(array[1]); + sparkSql2.base64Encode(); + return RestResponse.success(new SparkSql[]{sparkSql1, sparkSql2}); + } + + @PostMapping("history") + @Permission(app = "#app.id", team = "#app.teamId") + public RestResponse history(Application app) { + List sqlList = sparkSqlService.listSparkSqlHistory(app.getId()); + return RestResponse.success(sqlList); + } + + @PostMapping("sqlComplete") + public RestResponse getSqlComplete(@NotNull(message = "{required}") String sql) { + return RestResponse.success().put("word", sqlComplete.getComplete(sql)); + } +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkApplication.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkApplication.java index 8dad101fbc..965a0f2127 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkApplication.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkApplication.java @@ -21,7 +21,7 @@ import org.apache.streampark.common.conf.ConfigKeys; import org.apache.streampark.common.conf.Workspace; import org.apache.streampark.common.enums.ApplicationType; -import org.apache.streampark.common.enums.FlinkDevelopmentMode; +import org.apache.streampark.common.enums.SparkDevelopmentMode; import org.apache.streampark.common.enums.SparkExecutionMode; import org.apache.streampark.common.enums.StorageType; import org.apache.streampark.common.fs.FsOperator; @@ -29,16 +29,13 @@ import org.apache.streampark.console.base.util.JacksonUtils; import org.apache.streampark.console.core.bean.AppControl; import org.apache.streampark.console.core.bean.Dependency; -import org.apache.streampark.console.core.enums.FlinkAppStateEnum; import org.apache.streampark.console.core.enums.ReleaseStateEnum; import org.apache.streampark.console.core.enums.ResourceFromEnum; import org.apache.streampark.console.core.enums.SparkAppStateEnum; -import org.apache.streampark.console.core.metrics.flink.JobsOverview; +import org.apache.streampark.console.core.metrics.spark.SparkApplicationSummary; import org.apache.streampark.console.core.util.YarnQueueLabelExpression; -import org.apache.streampark.flink.kubernetes.model.K8sPodTemplates; import org.apache.streampark.flink.packer.maven.DependencyInfo; -import org.apache.commons.collections.MapUtils; import org.apache.commons.lang3.StringUtils; import com.baomidou.mybatisplus.annotation.FieldStrategy; @@ -49,16 +46,13 @@ import com.baomidou.mybatisplus.core.toolkit.support.SFunction; import com.fasterxml.jackson.annotation.JsonIgnore; import lombok.Data; -import lombok.Getter; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; -import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.Objects; -import java.util.Optional; @Data @TableName("t_spark_app") @@ -73,60 +67,78 @@ public class SparkApplication extends BaseEntity { /** 1) custom code 2) spark SQL */ private Integer jobType; + /** 1) Apache Spark 2) StreamPark Spark */ + private Integer appType; + + /** spark version */ + private Long versionId; + + /** spark.app.name */ + private String appName; + + private Integer executionMode; + + /** 1: cicd (build from csv) 2: upload (upload local jar job) */ + private Integer resourceFrom; + private Long projectId; - /** creator */ - private Long userId; - /** The name of the frontend and program displayed in yarn */ - private String jobName; + /** application module */ + private String module; - @TableField(updateStrategy = FieldStrategy.IGNORED) - private String appId; + private String mainClass; - @TableField(updateStrategy = FieldStrategy.IGNORED) - private String jobId; + private String jar; - /** The address of the jobmanager, that is, the direct access address of the Flink web UI */ - @TableField(updateStrategy = FieldStrategy.IGNORED) - private String jobManagerUrl; + /** + * for upload type tasks, checkSum needs to be recorded whether it needs to be republished after + * the update and modify. + */ + private Long jarCheckSum; - /** spark version */ - private Long versionId; + /** + * Arbitrary Spark configuration property in key=value format + * e.g. spark.driver.cores=1 + */ + private String appProperties; - /** k8s cluster id */ - private String clusterId; + /** Arguments passed to the main method of your main class */ + private String appArgs; - /** spark docker base image */ - private String sparkImage; + /** + * yarn application id for spark on Yarn. e.g. application_1722935916851_0014 + * driver pod name for spark on K8s.(will be supported in the future) + */ + @TableField(updateStrategy = FieldStrategy.IGNORED) + private String appId; - /** The resource name of the spark job on k8s, equivalent to clusterId in application mode. */ - private String k8sName; + private String yarnQueue; - /** k8s namespace */ - private String k8sNamespace = Constant.DEFAULT; + private transient String yarnQueueName; - /** The exposed type of the rest service of K8s(kubernetes.rest-service.exposed.type) */ - private Integer k8sRestExposedType; - /** spark kubernetes pod template */ - private String k8sPodTemplate; + /** + * spark on yarn can launch am and executors on particular nodes by configuring: + * "spark.yarn.am.nodeLabelExpression" and "spark.yarn.executor.nodeLabelExpression" + */ + private transient String yarnQueueLabel; - private String k8sJmPodTemplate; - private String k8sTmPodTemplate; + /** The api server url of k8s. */ + private String k8sMasterUrl; - @Getter - private String ingressTemplate; - private String defaultModeIngress; + /** spark docker base image */ + private String k8sContainerImage; - /** spark-hadoop integration on spark-k8s mode */ - private Boolean k8sHadoopIntegration; + /** k8s image pull policy */ + private int k8sImagePullPolicy; - private Integer state; - /** task release status */ - @TableField("`release`") - private Integer release; + /** k8s spark service account */ + private String k8sServiceAccount; - /** determine if a task needs to be built */ - private Boolean build; + /** k8s namespace */ + private String k8sNamespace = Constant.DEFAULT; + + @TableField("HADOOP_USER") + private String hadoopUser; /** max restart retries after job failed */ @TableField(updateStrategy = FieldStrategy.IGNORED) @@ -135,113 +147,72 @@ public class SparkApplication extends BaseEntity { /** has restart count */ private Integer restartCount; - private Integer optionState; + private Integer state; - /** alert id */ - @TableField(updateStrategy = FieldStrategy.IGNORED) - private Long alertId; + private String options; - private String args; - /** application module */ - private String module; + private Integer optionState; - private String options; + private Date optionTime; - @TableField(updateStrategy = FieldStrategy.IGNORED) - private String hotParams; + private Long userId; - private Integer resolveOrder; - private Integer executionMode; - private String dynamicProperties; - private Integer appType; + private String description; /** determine if tracking status */ private Integer tracking; - private String jar; - - /** - * for upload type tasks, checkSum needs to be recorded whether it needs to be republished after - * the update and modify. - */ - private Long jarCheckSum; - - private String mainClass; + /** task release status */ + @TableField("`release`") + private Integer release; - private Date startTime; + /** determine if a task needs to be built */ + private Boolean build; + /** alert id */ @TableField(updateStrategy = FieldStrategy.IGNORED) - private Date endTime; - - private Long duration; - - /** checkpoint max failure interval */ - private Integer cpMaxFailureInterval; - - /** checkpoint failure rate interval */ - private Integer cpFailureRateInterval; + private Long alertId; - /** Actions triggered after X minutes failed Y times: 1: send alert 2: restart */ - private Integer cpFailureAction; + private Date createTime; - /** overview */ - @TableField("TOTAL_TM") - private Integer totalTM; + private Date modifyTime; - @TableField("HADOOP_USER") - private String hadoopUser; - - private Integer totalSlot; - private Integer availableSlot; - private Integer jmMemory; - private Integer tmMemory; - private Integer totalTask; + private Date startTime; - /** the cluster id bound to the task in remote mode */ @TableField(updateStrategy = FieldStrategy.IGNORED) - private Long sparkClusterId; - - private String description; - - private Date optionTime; + private Date endTime; - /** 1: cicd (build from csv) 2: upload (upload local jar job) */ - private Integer resourceFrom; + private Long duration; private String tags; - /** running job */ - private transient JobsOverview.Task overview; + /** scheduling */ + private String driverCores; + private String driverMemory; + private String executorCores; + private String executorMemory; + private String executorMaxNums; + + /** metrics of running job */ + private Long numTasks; + private Long numCompletedTasks; + private Long numStages; + private Long numCompletedStages; + private Long usedMemory; + private Long usedVCores; private transient String teamResource; private transient String dependency; private transient Long sqlId; private transient String sparkSql; - - private transient Integer[] stateArray; - private transient Integer[] jobTypeArray; private transient Boolean backUp = false; private transient Boolean restart = false; - private transient String userName; - private transient String nickName; private transient String config; private transient Long configId; private transient String sparkVersion; private transient String confPath; private transient Integer format; - private transient String savepointPath; - private transient Boolean restoreOrTriggerSavepoint = false; - private transient Boolean drain = false; - private transient Boolean nativeFormat = false; - private transient Long savepointTimeout = 60L; - private transient Boolean allowNonRestored = false; - private transient Integer restoreMode; - private transient String socketId; - private transient String projectName; - private transient String createTimeFrom; - private transient String createTimeTo; private transient String backUpDescription; - private transient String yarnQueue; /** spark Web UI Url */ private transient String sparkRestUrl; @@ -251,37 +222,50 @@ public class SparkApplication extends BaseEntity { private transient AppControl appControl; - public void setDefaultModeIngress(String defaultModeIngress) { - this.defaultModeIngress = defaultModeIngress; - } - public void setK8sNamespace(String k8sNamespace) { this.k8sNamespace = StringUtils.isBlank(k8sNamespace) ? Constant.DEFAULT : k8sNamespace; } - public K8sPodTemplates getK8sPodTemplates() { - return K8sPodTemplates.of(k8sPodTemplate, k8sJmPodTemplate, k8sTmPodTemplate); - } - public void setState(Integer state) { this.state = state; this.tracking = shouldTracking() ? 1 : 0; } - public void setYarnQueueByHotParams() { + public void resolveYarnQueue() { if (!(SparkExecutionMode.YARN_CLIENT == this.getSparkExecutionMode() || SparkExecutionMode.YARN_CLUSTER == this.getSparkExecutionMode())) { return; } + if (StringUtils.isBlank(this.yarnQueue)) { + this.yarnQueue = "default"; + } + Map queueLabelMap = YarnQueueLabelExpression.getQueueLabelMap(this.yarnQueue); + this.setYarnQueueName(queueLabelMap.getOrDefault(ConfigKeys.KEY_YARN_APP_QUEUE(), "default")); + this.setYarnQueueLabel(queueLabelMap.getOrDefault(ConfigKeys.KEY_YARN_APP_NODE_LABEL(), null)); + } - Map hotParamsMap = this.getHotParamsMap(); - if (MapUtils.isNotEmpty(hotParamsMap) - && hotParamsMap.containsKey(ConfigKeys.KEY_YARN_APP_QUEUE())) { - String yarnQueue = hotParamsMap.get(ConfigKeys.KEY_YARN_APP_QUEUE()).toString(); - String labelExpr = Optional.ofNullable(hotParamsMap.get(ConfigKeys.KEY_YARN_APP_NODE_LABEL())) - .map(Object::toString) - .orElse(null); - this.setYarnQueue(YarnQueueLabelExpression.of(yarnQueue, labelExpr).toString()); + /** + * Resolve the scheduling configuration of the Spark application. + * About executorMaxNums: + * 1) if dynamic allocation is disabled, it depends on "spark.executor.instances". + * 2) if dynamic allocation is enabled and "spark.dynamicAllocation.maxExecutors" is set, it depends on it. + * 3) if dynamic allocation is enabled and "spark.dynamicAllocation.maxExecutors" is not set, + * the number of executors can up to infinity. + * + * @param map The configuration map integrated with default configurations, + * configuration template and custom configurations. + */ + public void resolveScheduleConf(Map map) { + this.setDriverCores(map.get(ConfigKeys.KEY_SPARK_DRIVER_CORES())); + this.setDriverMemory(map.get(ConfigKeys.KEY_SPARK_DRIVER_MEMORY())); + this.setExecutorCores(map.get(ConfigKeys.KEY_SPARK_EXECUTOR_CORES())); + this.setExecutorMemory(map.get(ConfigKeys.KEY_SPARK_EXECUTOR_MEMORY())); + boolean isDynamicAllocationEnabled = + Boolean.parseBoolean(map.get(ConfigKeys.KEY_SPARK_DYNAMIC_ALLOCATION_ENABLED())); + if (isDynamicAllocationEnabled) { + this.setExecutorMaxNums(map.getOrDefault(ConfigKeys.KEY_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS(), "inf")); + } else { + this.setExecutorMaxNums(map.get(ConfigKeys.KEY_SPARK_EXECUTOR_INSTANCES())); } } @@ -327,8 +311,8 @@ public ReleaseStateEnum getReleaseState() { } @JsonIgnore - public FlinkDevelopmentMode getDevelopmentMode() { - return FlinkDevelopmentMode.of(jobType); + public SparkDevelopmentMode getDevelopmentMode() { + return SparkDevelopmentMode.valueOf(jobType); } @JsonIgnore @@ -341,21 +325,6 @@ public SparkExecutionMode getSparkExecutionMode() { return SparkExecutionMode.of(executionMode); } - public boolean cpFailedTrigger() { - return this.cpMaxFailureInterval != null - && this.cpFailureRateInterval != null - && this.cpFailureAction != null; - } - - public boolean eqFlinkJob(SparkApplication other) { - if (this.isSparkSqlJob() - && other.isSparkSqlJob() - && this.getSparkSql().trim().equals(other.getSparkSql().trim())) { - return this.getDependencyObject().equals(other.getDependencyObject()); - } - return false; - } - /** Local compilation and packaging working directory */ @JsonIgnore public String getDistHome() { @@ -418,18 +387,23 @@ public Map getOptionMap() { @JsonIgnore public boolean isSparkSqlJob() { - return FlinkDevelopmentMode.FLINK_SQL.getMode().equals(this.getJobType()); + return SparkDevelopmentMode.SPARK_SQL.getMode().equals(this.getJobType()); } @JsonIgnore public boolean isCustomCodeJob() { - return FlinkDevelopmentMode.CUSTOM_CODE.getMode().equals(this.getJobType()); + return SparkDevelopmentMode.CUSTOM_CODE.getMode().equals(this.getJobType()); + } + + @JsonIgnore + public boolean isCustomCodeOrSparkSqlJob() { + return isSparkSqlJob() || isCustomCodeJob(); } @JsonIgnore public boolean isCustomCodeOrPySparkJob() { - return FlinkDevelopmentMode.CUSTOM_CODE.getMode().equals(this.getJobType()) - || FlinkDevelopmentMode.PYFLINK.getMode().equals(this.getJobType()); + return SparkDevelopmentMode.CUSTOM_CODE.getMode().equals(this.getJobType()) + || SparkDevelopmentMode.PYSPARK.getMode().equals(this.getJobType()); } @JsonIgnore @@ -461,7 +435,7 @@ public DependencyInfo getDependencyInfo() { @JsonIgnore public boolean isRunning() { - return FlinkAppStateEnum.RUNNING.getValue() == this.getState(); + return SparkAppStateEnum.RUNNING.getValue() == this.getState(); } @JsonIgnore @@ -505,40 +479,13 @@ public Workspace getWorkspace() { return Workspace.of(getStorageType()); } - @JsonIgnore - @SneakyThrows - @SuppressWarnings("unchecked") - public Map getHotParamsMap() { - if (StringUtils.isNotBlank(this.hotParams)) { - Map hotParamsMap = JacksonUtils.read(this.hotParams, Map.class); - hotParamsMap.entrySet().removeIf(entry -> entry.getValue() == null); - return hotParamsMap; - } - return Collections.EMPTY_MAP; - } - - @SneakyThrows - public void doSetHotParams() { - updateHotParams(this); - } - - @SneakyThrows - public void updateHotParams(SparkApplication appParam) { - if (appParam != this) { - this.hotParams = null; - } - SparkExecutionMode executionModeEnum = appParam.getSparkExecutionMode(); - Map hotParams = new HashMap<>(0); - if (needFillYarnQueueLabel(executionModeEnum)) { - hotParams.putAll(YarnQueueLabelExpression.getQueueLabelMap(appParam.getYarnQueue())); - } - if (MapUtils.isNotEmpty(hotParams)) { - this.setHotParams(JacksonUtils.write(hotParams)); - } - } - - private boolean needFillYarnQueueLabel(SparkExecutionMode mode) { - return SparkExecutionMode.YARN_CLUSTER == mode || SparkExecutionMode.YARN_CLIENT == mode; + public void fillRunningMetrics(SparkApplicationSummary summary) { + this.setNumTasks(summary.getNumTasks()); + this.setNumCompletedTasks(summary.getNumCompletedTasks()); + this.setNumStages(summary.getNumStages()); + this.setNumCompletedStages(summary.getNumCompletedStages()); + this.setUsedMemory(summary.getUsedMemory()); + this.setUsedVCores(summary.getUsedVCores()); } @Override @@ -560,19 +507,12 @@ public int hashCode() { public static class SFunc { public static final SFunction ID = SparkApplication::getId; - public static final SFunction JOB_ID = SparkApplication::getJobId; + public static final SFunction APP_ID = SparkApplication::getAppId; public static final SFunction START_TIME = SparkApplication::getStartTime; public static final SFunction END_TIME = SparkApplication::getEndTime; public static final SFunction DURATION = SparkApplication::getDuration; - public static final SFunction TOTAL_TASK = SparkApplication::getTotalTask; - public static final SFunction TOTAL_TM = SparkApplication::getTotalTM; - public static final SFunction TOTAL_SLOT = SparkApplication::getTotalSlot; - public static final SFunction JM_MEMORY = SparkApplication::getJmMemory; - public static final SFunction TM_MEMORY = SparkApplication::getTmMemory; public static final SFunction STATE = SparkApplication::getState; public static final SFunction OPTIONS = SparkApplication::getOptions; - public static final SFunction AVAILABLE_SLOT = SparkApplication::getAvailableSlot; public static final SFunction EXECUTION_MODE = SparkApplication::getExecutionMode; - public static final SFunction JOB_MANAGER_URL = SparkApplication::getJobManagerUrl; } } diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkApplicationBackUp.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkApplicationBackUp.java new file mode 100644 index 0000000000..e404e8e71c --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkApplicationBackUp.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.entity; + +import org.apache.streampark.common.conf.Workspace; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; + +import java.util.Date; + +@Data +@TableName("t_spark_app_backup") +@Slf4j +public class SparkApplicationBackUp { + + @TableId(type = IdType.AUTO) + private Long id; + + private Long appId; + + private Long configId; + + private Long sqlId; + + private String path; + + private String description; + /** version number at the backup */ + private Integer version; + + private Date createTime; + + private transient boolean backup; + + public SparkApplicationBackUp() { + } + + public SparkApplicationBackUp(SparkApplication application) { + this.appId = application.getId(); + this.sqlId = application.getSqlId(); + this.configId = application.getConfigId(); + this.description = application.getBackUpDescription(); + this.createTime = new Date(); + renderPath(application); + } + + private void renderPath(SparkApplication application) { + switch (application.getSparkExecutionMode()) { + case LOCAL: + this.path = String.format( + "%s/%d/%d", + Workspace.local().APP_BACKUPS(), application.getId(), createTime.getTime()); + break; + case YARN_CLUSTER: + case YARN_CLIENT: + this.path = String.format( + "%s/%d/%d", + Workspace.remote().APP_BACKUPS(), application.getId(), createTime.getTime()); + break; + default: + throw new UnsupportedOperationException( + "unsupported executionMode ".concat(application.getSparkExecutionMode().getName())); + } + } +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkApplicationConfig.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkApplicationConfig.java new file mode 100644 index 0000000000..dda55bd544 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkApplicationConfig.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.entity; + +import org.apache.streampark.common.util.DeflaterUtils; +import org.apache.streampark.common.util.PropertiesUtils; +import org.apache.streampark.console.core.enums.ConfigFileTypeEnum; + +import com.baomidou.mybatisplus.annotation.FieldStrategy; +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.Nullable; + +import java.util.Base64; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; +@Data +@TableName("t_spark_config") +@Slf4j +public class SparkApplicationConfig { + + @TableId(type = IdType.AUTO) + private Long id; + + private Long appId; + + /** + * 1)yaml
+ * 2)prop
+ * 3)hocon + */ + private Integer format; + + @TableField(updateStrategy = FieldStrategy.IGNORED) + private String content; + + /** default version: 1 */ + private Integer version = 1; + + /** record the configuration to take effect for the target */ + private Boolean latest; + + private Date createTime; + + private transient boolean effective = false; + + public void setToApplication(SparkApplication application) { + String unzipString = DeflaterUtils.unzipString(content); + String encode = Base64.getEncoder().encodeToString(unzipString.getBytes()); + application.setConfig(encode); + application.setConfigId(this.id); + application.setFormat(this.format); + } + + @Nullable + private Map readConfig() { + ConfigFileTypeEnum fileType = ConfigFileTypeEnum.of(this.format); + if (fileType == null) { + return null; + } + switch (fileType) { + case YAML: + return PropertiesUtils.fromYamlTextAsJava(DeflaterUtils.unzipString(this.content)); + case PROPERTIES: + return PropertiesUtils.fromPropertiesTextAsJava(DeflaterUtils.unzipString(this.content)); + case HOCON: + return PropertiesUtils.fromHoconTextAsJava(DeflaterUtils.unzipString(this.content)); + default: + return new HashMap<>(); + } + } +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkEffective.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkEffective.java new file mode 100644 index 0000000000..a85f60d6fe --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkEffective.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.entity; + +import org.apache.streampark.console.core.enums.EffectiveTypeEnum; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; + +import java.util.Date; + +@Data +@TableName("t_spark_effective") +@Slf4j +public class SparkEffective { + + @TableId(type = IdType.AUTO) + private Long id; + + private Long appId; + /** + * 1) config
+ * 2) spark Sql
+ */ + private Integer targetType; + + private Long targetId; + private Date createTime; + + private transient EffectiveTypeEnum effectiveType; +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkSql.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkSql.java new file mode 100644 index 0000000000..4ea78ea440 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/entity/SparkSql.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.entity; + +import org.apache.streampark.common.util.DeflaterUtils; +import org.apache.streampark.console.core.bean.Dependency; +import org.apache.streampark.console.core.enums.ChangeTypeEnum; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.Data; + +import java.util.Base64; +import java.util.Date; +import java.util.Objects; + +@Data +@TableName("t_spark_sql") +public class SparkSql { + + @TableId(type = IdType.AUTO) + private Long id; + + private Long appId; + + @TableField("`sql`") + private String sql; + + private String teamResource; + + private String dependency; + + private Integer version = 1; + + /** + * candidate number: 0: none candidate
+ * 1: newly added record becomes a candidate
+ * 2: specific history becomes a candidate
+ */ + private Integer candidate; + + private Date createTime; + + private transient boolean effective = false; + /** sql diff */ + private transient boolean sqlDifference = false; + /** dependency diff */ + private transient boolean dependencyDifference = false; + + public SparkSql() { + } + + public SparkSql(SparkApplication application) { + this.appId = application.getId(); + this.sql = application.getSparkSql(); + this.teamResource = application.getTeamResource(); + this.dependency = application.getDependency(); + this.createTime = new Date(); + } + + public void decode() { + this.setSql(DeflaterUtils.unzipString(this.sql)); + } + + public void setToApplication(SparkApplication application) { + String encode = Base64.getEncoder().encodeToString(this.sql.getBytes()); + application.setSparkSql(encode); + application.setDependency(this.dependency); + application.setTeamResource(this.teamResource); + application.setSqlId(this.id); + } + + public ChangeTypeEnum checkChange(SparkSql target) { + // 1) determine if sql statement has changed + boolean isSqlChanged = !this.getSql().trim().equals(target.getSql().trim()); + // 2) determine if dependency has changed + Dependency thisDependency = Dependency.toDependency(this.getDependency()); + Dependency targetDependency = Dependency.toDependency(target.getDependency()); + boolean depDifference = !thisDependency.equals(targetDependency); + // 3) determine if team resource has changed + boolean teamResDifference = !Objects.equals(this.teamResource, target.getTeamResource()); + + if (isSqlChanged && depDifference && teamResDifference) { + return ChangeTypeEnum.ALL; + } + if (isSqlChanged) { + return ChangeTypeEnum.SQL; + } + if (depDifference) { + return ChangeTypeEnum.DEPENDENCY; + } + if (teamResDifference) { + return ChangeTypeEnum.TEAM_RESOURCE; + } + return ChangeTypeEnum.NONE; + } + + public void base64Encode() { + this.sql = Base64.getEncoder().encodeToString(DeflaterUtils.unzipString(this.sql).getBytes()); + } +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/enums/EffectiveTypeEnum.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/enums/EffectiveTypeEnum.java index b529ad9b13..b816d16e13 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/enums/EffectiveTypeEnum.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/enums/EffectiveTypeEnum.java @@ -25,7 +25,11 @@ public enum EffectiveTypeEnum { /** config */ CONFIG(1), /** FLINKSQL */ - FLINKSQL(2); + FLINKSQL(2), + /** spark config */ + SPARKCONFIG(3), + /** SPARKSQL */ + SPARKSQL(4); private final int type; diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkApplicationBackUpMapper.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkApplicationBackUpMapper.java new file mode 100644 index 0000000000..5bf946c826 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkApplicationBackUpMapper.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.mapper; + +import org.apache.streampark.console.core.entity.SparkApplicationBackUp; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +public interface SparkApplicationBackUpMapper extends BaseMapper { +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkApplicationConfigMapper.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkApplicationConfigMapper.java new file mode 100644 index 0000000000..2644e79787 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkApplicationConfigMapper.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.mapper; + +import org.apache.streampark.console.core.entity.SparkApplicationConfig; + +import org.apache.ibatis.annotations.Param; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; + +public interface SparkApplicationConfigMapper extends BaseMapper { + + Integer selectLastVersion(@Param("appId") Long appId); + + SparkApplicationConfig selectEffective(@Param("appId") Long appId); + + SparkApplicationConfig selectLatest(@Param("appId") Long appId); + + IPage selectPageByAppId( + Page page, @Param("appId") Long appId); +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkApplicationMapper.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkApplicationMapper.java index 7ddfb00fd9..23dab27d37 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkApplicationMapper.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkApplicationMapper.java @@ -55,9 +55,4 @@ List selectRecentK8sClusterIds( void resetOptionState(); List selectAppsByProjectId(@Param("projectId") Long id); - - boolean existsRunningJobByClusterId(@Param("clusterId") Long clusterId); - - Integer countAffectedByClusterId( - @Param("clusterId") Long clusterId, @Param("dbType") String dbType); } diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkEffectiveMapper.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkEffectiveMapper.java new file mode 100644 index 0000000000..0520d0bb5a --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkEffectiveMapper.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.mapper; + +import org.apache.streampark.console.core.entity.SparkEffective; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +public interface SparkEffectiveMapper extends BaseMapper { +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkSqlMapper.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkSqlMapper.java new file mode 100644 index 0000000000..fab5b9d211 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/mapper/SparkSqlMapper.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.mapper; + +import org.apache.streampark.console.core.entity.SparkSql; + +import org.apache.ibatis.annotations.Param; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +import java.util.List; + +public interface SparkSqlMapper extends BaseMapper { + + SparkSql getEffective(@Param("appId") Long appId); + + Integer getLatestVersion(@Param("appId") Long appId); + + List selectSqlsByTeamId(@Param("teamId") Long teamId); +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/metrics/spark/SparkApplicationSummary.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/metrics/spark/SparkApplicationSummary.java new file mode 100644 index 0000000000..2c595ffb79 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/metrics/spark/SparkApplicationSummary.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.metrics.spark; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; + +@Data +@AllArgsConstructor +public class SparkApplicationSummary implements Serializable { + + private Long numTasks; + private Long numCompletedTasks; + private Long numStages; + private Long numCompletedStages; + private Long usedMemory; + private Long usedVCores; +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkApplicationBackUpService.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkApplicationBackUpService.java new file mode 100644 index 0000000000..1c3f2f92ee --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkApplicationBackUpService.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.service; + +import org.apache.streampark.console.base.domain.RestRequest; +import org.apache.streampark.console.base.exception.InternalException; +import org.apache.streampark.console.core.entity.SparkApplication; +import org.apache.streampark.console.core.entity.SparkApplicationBackUp; +import org.apache.streampark.console.core.entity.SparkSql; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.service.IService; + +/** Interface representing a service for application backup operations. */ +public interface SparkApplicationBackUpService extends IService { + + /** + * Deletes an object specified by the given ID. + * + * @param id The ID of the object to delete. + * @return true if the object was successfully deleted, false otherwise. + * @throws InternalException if an internal error occurs during the deletion process. + */ + Boolean removeById(Long id) throws InternalException; + + /** + * Performs a backup for the given application and Spark SQL parameters. + * + * @param appParam The application to back up. + * @param sparkSqlParam The Spark SQL to back up. + */ + void backup(SparkApplication appParam, SparkSql sparkSqlParam); + + /** + * Retrieves a page of {@link SparkApplicationBackUp} objects based on the provided parameters. + * + * @param bakParam The {@link SparkApplicationBackUp} object containing the search criteria. + * @param request The {@link RestRequest} object used for pagination and sorting. + * @return An {@link IPage} containing the retrieved {@link SparkApplicationBackUp} objects. + */ + IPage getPage(SparkApplicationBackUp bakParam, RestRequest request); + + /** + * Rolls back the changes made by the specified application backup. + * + * @param bakParam The SparkApplicationBackUp object representing the backup to roll back. + */ + void rollback(SparkApplicationBackUp bakParam); + + /** + * Revoke the given application. + * + * @param appParam The application to be revoked. + */ + void revoke(SparkApplication appParam); + + /** + * Removes the specified application. + * + * @param appParam the application to be removed + */ + void remove(SparkApplication appParam); + + /** + * Rolls back a Spark SQL application to its previous state. + * + * @param appParam The application to rollback. + * @param sparkSqlParam The Spark SQL instance associated with the application. + */ + void rollbackSparkSql(SparkApplication appParam, SparkSql sparkSqlParam); +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkApplicationConfigService.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkApplicationConfigService.java new file mode 100644 index 0000000000..b289910d47 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkApplicationConfigService.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.service; + +import org.apache.streampark.console.base.domain.RestRequest; +import org.apache.streampark.console.core.entity.SparkApplication; +import org.apache.streampark.console.core.entity.SparkApplicationConfig; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.service.IService; + +import java.util.List; + +/** This interface defines the methods to manage the application configuration. */ +public interface SparkApplicationConfigService extends IService { + + /** + * Creates a new instance of an Application. + * + * @param appParam The Application object to create. + * @param latest If set to true, sets the created Application as the latest version. + */ + void create(SparkApplication appParam, Boolean latest); + + /** + * Updates the given application. + * + * @param appParam the application to be updated + * @param latest a boolean indicating whether to update to the latest version + */ + void update(SparkApplication appParam, Boolean latest); + + /** + * Sets the latest or effective flag for a given configuration and application. The latest flag + * determines whether the configuration is the latest version available. The effective flag + * determines whether the configuration is effective for the application. + * + * @param latest a boolean value indicating whether the configuration is the latest version (true) + * or not (false) + * @param configId the ID of the configuration + * @param appId the ID of the application + */ + void setLatestOrEffective(Boolean latest, Long configId, Long appId); + + /** + * Sets the configuration to effective for the given application and configuration ID. + * + * @param appId The ID of the application + * @param configId The ID of the configuration + */ + void toEffective(Long appId, Long configId); + + /** + * Returns the latest version of the application configuration for the given application ID. + * + * @param appId The ID of the application + * @return The latest version of the application configuration + */ + SparkApplicationConfig getLatest(Long appId); + + /** + * Retrieves the effective ApplicationConfig for the given appId. + * + * @param appId The identifier of the application. + * @return The effective ApplicationConfig. + */ + SparkApplicationConfig getEffective(Long appId); + + /** + * Retrieves the ApplicationConfig for the specified ID. + * + * @param id the ID of the ApplicationConfig to retrieve + * @return the ApplicationConfig object corresponding to the specified ID, or null if no + * ApplicationConfig is found + */ + SparkApplicationConfig get(Long id); + + /** + * Retrieves a page of ApplicationConfig objects based on the specified ApplicationConfig and + * RestRequest. + * + * @param config the ApplicationConfig object to use as a filter for retrieving the page + * @param request the RestRequest object containing additional parameters and settings for + * retrieving the page + * @return an IPage containing the ApplicationConfig objects that match the filter criteria + * specified in the config object, limited by the settings in the request object + */ + IPage getPage(SparkApplicationConfig config, RestRequest request); + + /** + * Retrieves the history of application configurations for a given application. + * + * @param appId The application's id for which to retrieve the history. + * @return The list of application configurations representing the history. + */ + List list(Long appId); + + /** + * Reads a template from a file or a database. + * + * @return the content of the template as a String + */ + String readTemplate(); + + /** + * Removes the app with the specified appId. + * + * @param appId The id of the app to be removed. + */ + void removeByAppId(Long appId); +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkEffectiveService.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkEffectiveService.java new file mode 100644 index 0000000000..aa53cc1f92 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkEffectiveService.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.service; + +import org.apache.streampark.console.core.entity.SparkEffective; +import org.apache.streampark.console.core.enums.EffectiveTypeEnum; + +import com.baomidou.mybatisplus.extension.service.IService; + +public interface SparkEffectiveService extends IService { + + void remove(Long appId, EffectiveTypeEnum config); + + SparkEffective get(Long appId, EffectiveTypeEnum config); + + void saveOrUpdate(Long appId, EffectiveTypeEnum type, Long id); + + void removeByAppId(Long appId); +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkSqlService.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkSqlService.java new file mode 100644 index 0000000000..5cb640aa0f --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/SparkSqlService.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.service; + +import org.apache.streampark.console.base.domain.RestRequest; +import org.apache.streampark.console.core.entity.SparkApplication; +import org.apache.streampark.console.core.entity.SparkSql; +import org.apache.streampark.console.core.enums.CandidateTypeEnum; +import org.apache.streampark.spark.core.util.SparkSqlValidationResult; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.service.IService; + +import java.util.List; + +/** This service is used to handle SQL submission tasks */ +public interface SparkSqlService extends IService { + + /** + * Create SparkSQL + * + * @param sparkSql SparkSql will create + */ + void create(SparkSql sparkSql); + + /** + * Set Candidate about application and SQL. + * + * @param candidateTypeEnum CandidateTypeEnum + * @param appId application id + * @param sqlId SparkSQL id + */ + void setCandidate(CandidateTypeEnum candidateTypeEnum, Long appId, Long sqlId); + + /** + * @param appId Application id + * @param decode Whether to choose decode + * @return SparkSQL + */ + SparkSql getEffective(Long appId, boolean decode); + + /** + * get latest one SparkSQL by application id + * + * @param appId Application id + * @param decode Whether to choose decode + * @return SparkSQL of the latest + */ + SparkSql getLatestSparkSql(Long appId, boolean decode); + + /** + * Get all historical SQL through Application + * + * @param appId Application id + * @return list of History SparkSQL + */ + List listSparkSqlHistory(Long appId); + + /** + * Get SparkSQL by Application id and Candidate Type + * + * @param appId Application id + * @param type CandidateTypeEnum + * @return SparkSQL + */ + SparkSql getCandidate(Long appId, CandidateTypeEnum type); + + /** + * @param appId Application id + * @param sqlId SparkSQL id + */ + void toEffective(Long appId, Long sqlId); + + /** + * clean all candidate + * + * @param id SparkSQL id + */ + void cleanCandidate(Long id); + + /** + * Remove SparkSQL by Application id + * + * @param appId Application id + */ + void removeByAppId(Long appId); + + /** + * SparkSQL rollback + * + * @param application SparkApplication + */ + void rollback(SparkApplication application); + + /** + * Verify whether the entered SQL is correct + * + * @param sql SQL + * @param versionId SparkENV version id + * @return SparkSqlValidationResult Check the correctness of SQL + */ + SparkSqlValidationResult verifySql(String sql, Long versionId); + + /** + * List all SparkSQL by each SparkSQL team id + * + * @param teamId SparkSQL team id + * @return list of SparkSQL + */ + List listByTeamId(Long teamId); + + /** + * Retrieves a page of {@link SparkSql} objects based on the provided parameters. + * + * @param appId Application id + * @param request request The {@link RestRequest} object used for pagination and sorting. + * @return An {@link IPage} containing the retrieved {@link SparkSql} objects. + */ + IPage getPage(Long appId, RestRequest request); +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/SparkApplicationInfoService.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/SparkApplicationInfoService.java index 4100826c5e..27a05d116e 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/SparkApplicationInfoService.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/SparkApplicationInfoService.java @@ -45,15 +45,6 @@ public interface SparkApplicationInfoService extends IService */ boolean checkEnv(SparkApplication appParam) throws ApplicationException; - /** - * Checks the savepoint path for the given application. - * - * @param appParam the application to check the savepoint path for - * @return the check message - * @throws Exception if an error occurs while checking the savepoint path - */ - String checkSavepointPath(SparkApplication appParam) throws Exception; - /** * Checks if the given application meets the required alterations. * @@ -136,34 +127,11 @@ public interface SparkApplicationInfoService extends IService List listRecentK8sNamespace(); /** - * Retrieves the list of recent K8s cluster IDs based on the specified execution mode. - * - * @param executionMode The execution mode to filter the recent K8s cluster IDs. 1: Production - * mode 2: Test mode 3: Development mode -1: All modes - * @return The list of recent K8s cluster IDs based on the specified execution mode. - */ - List listRecentK8sClusterId(Integer executionMode); - - /** - * Retrieves the recent K8s pod templates. - * - * @return a List of Strings representing the recent K8s pod templates. - */ - List listRecentK8sPodTemplate(); - - /** - * Retrieves the list of recent Kubernetes Job Manager Pod templates. - * - * @return A List of string values representing the recent Kubernetes Job Manager Pod templates. - */ - List listRecentK8sJmPodTemplate(); - - /** - * Retrieves the list of recent K8s TM pod templates. + * Retrieves the recent K8s container images * - * @return The list of recent K8s TM pod templates as a List of String objects. + * @return a List of Strings representing the recent K8s container images. */ - List listRecentK8sTmPodTemplate(); + List listRecentK8sContainerImage(); /** * Uploads a list of jars to the server for historical reference. diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationActionServiceImpl.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationActionServiceImpl.java index 8d828dc9c6..4a3591c827 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationActionServiceImpl.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationActionServiceImpl.java @@ -29,28 +29,27 @@ import org.apache.streampark.common.util.ExceptionUtils; import org.apache.streampark.common.util.HadoopUtils; import org.apache.streampark.common.util.PropertiesUtils; -import org.apache.streampark.common.util.YarnUtils; import org.apache.streampark.console.base.exception.ApiAlertException; import org.apache.streampark.console.base.exception.ApplicationException; import org.apache.streampark.console.core.entity.AppBuildPipeline; -import org.apache.streampark.console.core.entity.ApplicationConfig; -import org.apache.streampark.console.core.entity.FlinkSql; import org.apache.streampark.console.core.entity.Resource; import org.apache.streampark.console.core.entity.SparkApplication; +import org.apache.streampark.console.core.entity.SparkApplicationConfig; import org.apache.streampark.console.core.entity.SparkApplicationLog; import org.apache.streampark.console.core.entity.SparkEnv; +import org.apache.streampark.console.core.entity.SparkSql; import org.apache.streampark.console.core.enums.ConfigFileTypeEnum; import org.apache.streampark.console.core.enums.ReleaseStateEnum; import org.apache.streampark.console.core.enums.SparkAppStateEnum; import org.apache.streampark.console.core.enums.SparkOperationEnum; import org.apache.streampark.console.core.enums.SparkOptionStateEnum; import org.apache.streampark.console.core.mapper.SparkApplicationMapper; -import org.apache.streampark.console.core.service.AppBuildPipeService; -import org.apache.streampark.console.core.service.ApplicationConfigService; -import org.apache.streampark.console.core.service.FlinkSqlService; import org.apache.streampark.console.core.service.ResourceService; +import org.apache.streampark.console.core.service.SparkAppBuildPipeService; +import org.apache.streampark.console.core.service.SparkApplicationConfigService; import org.apache.streampark.console.core.service.SparkApplicationLogService; import org.apache.streampark.console.core.service.SparkEnvService; +import org.apache.streampark.console.core.service.SparkSqlService; import org.apache.streampark.console.core.service.VariableService; import org.apache.streampark.console.core.service.application.SparkApplicationActionService; import org.apache.streampark.console.core.service.application.SparkApplicationInfoService; @@ -65,9 +64,7 @@ import org.apache.streampark.spark.client.bean.SubmitResponse; import org.apache.commons.lang3.StringUtils; -import org.apache.flink.api.common.JobID; import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.configuration.MemorySize; import org.apache.hadoop.service.Service.STATE; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.YarnApplicationState; @@ -88,7 +85,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import java.util.concurrent.CancellationException; import java.util.concurrent.CompletableFuture; @@ -113,7 +109,7 @@ public class SparkApplicationActionServiceImpl private SparkApplicationInfoService applicationInfoService; @Autowired - private ApplicationConfigService configService; + private SparkApplicationConfigService configService; @Autowired private SparkApplicationLogService applicationLogService; @@ -122,10 +118,10 @@ public class SparkApplicationActionServiceImpl private SparkEnvService sparkEnvService; @Autowired - private FlinkSqlService flinkSqlService; + private SparkSqlService sparkSqlService; @Autowired - private AppBuildPipeService appBuildPipeService; + private SparkAppBuildPipeService appBuildPipeService; @Autowired private VariableService variableService; @@ -194,9 +190,8 @@ public void stop(SparkApplication appParam) throws Exception { SparkApplicationLog applicationLog = new SparkApplicationLog(); applicationLog.setOptionName(SparkOperationEnum.STOP.getValue()); applicationLog.setAppId(application.getId()); - applicationLog.setTrackUrl(application.getJobManagerUrl()); applicationLog.setOptionTime(new Date()); - applicationLog.setSparkAppId(application.getJobId()); + applicationLog.setSparkAppId(application.getAppId()); applicationLog.setUserId(ServiceHelper.getUserId()); application.setOptionTime(new Date()); this.baseMapper.updateById(application); @@ -216,7 +211,7 @@ public void stop(SparkApplication appParam) throws Exception { sparkEnv.getSparkVersion(), SparkExecutionMode.of(application.getExecutionMode()), stopProper, - application.getJobId()); + application.getAppId()); CompletableFuture stopFuture = CompletableFuture.supplyAsync(() -> SparkClient.stop(stopRequest), executorService); @@ -279,7 +274,6 @@ public void start(SparkApplication appParam, boolean auto) throws Exception { // 2) update app state to starting... starting(application); - String jobId = new JobID().toHexString(); SparkApplicationLog applicationLog = new SparkApplicationLog(); applicationLog.setOptionName(SparkOperationEnum.START.getValue()); applicationLog.setAppId(application.getId()); @@ -291,39 +285,43 @@ public void start(SparkApplication appParam, boolean auto) throws Exception { Map extraParameter = new HashMap<>(0); if (application.isSparkSqlJob()) { - FlinkSql flinkSql = flinkSqlService.getEffective(application.getId(), true); + SparkSql sparkSql = sparkSqlService.getEffective(application.getId(), true); // Get the sql of the replaced placeholder - String realSql = variableService.replaceVariable(application.getTeamId(), flinkSql.getSql()); - flinkSql.setSql(DeflaterUtils.zipString(realSql)); - extraParameter.put(ConfigKeys.KEY_FLINK_SQL(null), flinkSql.getSql()); + String realSql = variableService.replaceVariable(application.getTeamId(), sparkSql.getSql()); + sparkSql.setSql(DeflaterUtils.zipString(realSql)); + extraParameter.put(ConfigKeys.KEY_SPARK_SQL(null), sparkSql.getSql()); } Tuple2 userJarAndAppConf = getUserJarAndAppConf(sparkEnv, application); - String flinkUserJar = userJarAndAppConf.f0; + String sparkUserJar = userJarAndAppConf.f0; String appConf = userJarAndAppConf.f1; BuildResult buildResult = buildPipeline.getBuildResult(); - if (SparkExecutionMode.YARN_CLUSTER == application.getSparkExecutionMode() - || SparkExecutionMode.YARN_CLIENT == application.getSparkExecutionMode()) { - buildResult = new ShadedBuildResponse(null, flinkUserJar, true); - application.setJobManagerUrl(YarnUtils.getRMWebAppURL(true)); + if (SparkExecutionMode.isYarnMode(application.getSparkExecutionMode())) { + buildResult = new ShadedBuildResponse(null, sparkUserJar, true); + if (StringUtils.isNotBlank(application.getYarnQueueName())) { + extraParameter.put(ConfigKeys.KEY_SPARK_YARN_QUEUE_NAME(), application.getYarnQueueName()); + } + if (StringUtils.isNotBlank(application.getYarnQueueLabel())) { + extraParameter.put(ConfigKeys.KEY_SPARK_YARN_QUEUE_LABEL(), application.getYarnQueueLabel()); + } } // Get the args after placeholder replacement - String applicationArgs = variableService.replaceVariable(application.getTeamId(), application.getArgs()); + String applicationArgs = variableService.replaceVariable(application.getTeamId(), application.getAppArgs()); SubmitRequest submitRequest = new SubmitRequest( sparkEnv.getSparkVersion(), SparkExecutionMode.of(application.getExecutionMode()), - getProperties(application), sparkEnv.getSparkConf(), SparkDevelopmentMode.valueOf(application.getJobType()), application.getId(), - jobId, - application.getJobName(), + application.getAppName(), + application.getMainClass(), appConf, + PropertiesUtils.extractSparkPropertiesAsJava(application.getAppProperties()), + PropertiesUtils.extractSparkArgumentsAsJava(applicationArgs), application.getApplicationType(), - applicationArgs, application.getHadoopUser(), buildResult, extraParameter); @@ -357,22 +355,11 @@ public void start(SparkApplication appParam, boolean auto) throws Exception { // 3) success applicationLog.setSuccess(true); - // TODO:修改为spark对应的参数 - if (response.sparkConfig() != null) { - String jmMemory = response.sparkConfig().get(ConfigKeys.KEY_FLINK_JM_PROCESS_MEMORY()); - if (jmMemory != null) { - application.setJmMemory(MemorySize.parse(jmMemory).getMebiBytes()); - } - String tmMemory = response.sparkConfig().get(ConfigKeys.KEY_FLINK_TM_PROCESS_MEMORY()); - if (tmMemory != null) { - application.setTmMemory(MemorySize.parse(tmMemory).getMebiBytes()); - } - } - application.setAppId(response.clusterId()); - if (StringUtils.isNoneEmpty(response.clusterId())) { - application.setJobId(response.clusterId()); + application.resolveScheduleConf(response.sparkProperties()); + if (StringUtils.isNoneEmpty(response.sparkAppId())) { + application.setAppId(response.sparkAppId()); } - applicationLog.setSparkAppId(response.clusterId()); + applicationLog.setSparkAppId(response.sparkAppId()); application.setStartTime(new Date()); application.setEndTime(null); @@ -421,18 +408,18 @@ private void starting(SparkApplication application) { private Tuple2 getUserJarAndAppConf( SparkEnv sparkEnv, SparkApplication application) { SparkExecutionMode executionModeEnum = application.getSparkExecutionMode(); - ApplicationConfig applicationConfig = configService.getEffective(application.getId()); + SparkApplicationConfig applicationConfig = configService.getEffective(application.getId()); ApiAlertException.throwIfNull( executionModeEnum, "ExecutionMode can't be null, start application failed."); - String flinkUserJar = null; + String sparkUserJar = null; String appConf = null; switch (application.getDevelopmentMode()) { - case FLINK_SQL: - FlinkSql flinkSql = flinkSqlService.getEffective(application.getId(), false); - AssertUtils.notNull(flinkSql); + case SPARK_SQL: + SparkSql sparkSql = sparkSqlService.getEffective(application.getId(), false); + AssertUtils.notNull(sparkSql); // 1) dist_userJar String sqlDistJar = ServiceHelper.getSparkSqlClientJar(sparkEnv); // 2) appConfig @@ -442,11 +429,14 @@ private Tuple2 getUserJarAndAppConf( // 3) client if (SparkExecutionMode.YARN_CLUSTER == executionModeEnum) { String clientPath = Workspace.remote().APP_CLIENT(); - flinkUserJar = String.format("%s/%s", clientPath, sqlDistJar); + sparkUserJar = String.format("%s/%s", clientPath, sqlDistJar); } break; - case PYFLINK: + case PYSPARK: + appConf = applicationConfig == null + ? null + : String.format("yaml://%s", applicationConfig.getContent()); Resource resource = resourceService.findByResourceName(application.getTeamId(), application.getJar()); ApiAlertException.throwIfNull( @@ -459,14 +449,14 @@ private Tuple2 getUserJarAndAppConf( resource.getFilePath().endsWith(Constant.PYTHON_SUFFIX), "pyflink format error, must be a \".py\" suffix, start application failed."); - flinkUserJar = resource.getFilePath(); + sparkUserJar = resource.getFilePath(); break; case CUSTOM_CODE: if (application.isUploadJob()) { - appConf = String.format( - "json://{\"%s\":\"%s\"}", - ConfigKeys.KEY_FLINK_APPLICATION_MAIN_CLASS(), application.getMainClass()); + appConf = applicationConfig == null + ? null + : String.format("yaml://%s", applicationConfig.getContent()); } else { switch (application.getApplicationType()) { case STREAMPARK_SPARK: @@ -486,25 +476,25 @@ private Tuple2 getUserJarAndAppConf( break; default: throw new IllegalArgumentException( - "[StreamPark] ApplicationType must be (StreamPark flink | Apache flink)... "); + "[StreamPark] ApplicationType must be (StreamPark spark | Apache spark)... "); } } if (SparkExecutionMode.YARN_CLUSTER == executionModeEnum) { switch (application.getApplicationType()) { case STREAMPARK_SPARK: - flinkUserJar = String.format( + sparkUserJar = String.format( "%s/%s", application.getAppLib(), application.getModule().concat(Constant.JAR_SUFFIX)); break; case APACHE_SPARK: - flinkUserJar = String.format("%s/%s", application.getAppHome(), application.getJar()); - if (!FsOperator.hdfs().exists(flinkUserJar)) { + sparkUserJar = String.format("%s/%s", application.getAppHome(), application.getJar()); + if (!FsOperator.hdfs().exists(sparkUserJar)) { resource = resourceService.findByResourceName( application.getTeamId(), application.getJar()); if (resource != null && StringUtils.isNotBlank(resource.getFilePath())) { - flinkUserJar = String.format( + sparkUserJar = String.format( "%s/%s", application.getAppHome(), new File(resource.getFilePath()).getName()); @@ -513,29 +503,12 @@ private Tuple2 getUserJarAndAppConf( break; default: throw new IllegalArgumentException( - "[StreamPark] ApplicationType must be (StreamPark flink | Apache flink)... "); + "[StreamPark] ApplicationType must be (StreamPark spark | Apache spark)... "); } } break; } - return Tuple2.of(flinkUserJar, appConf); - } - - private Map getProperties(SparkApplication application) { - Map properties = new HashMap<>(application.getOptionMap()); - if (SparkExecutionMode.isYarnMode(application.getSparkExecutionMode())) { - String yarnQueue = (String) application.getHotParamsMap().get(ConfigKeys.KEY_YARN_APP_QUEUE()); - String yarnLabelExpr = (String) application.getHotParamsMap().get(ConfigKeys.KEY_YARN_APP_NODE_LABEL()); - Optional.ofNullable(yarnQueue) - .ifPresent(yq -> properties.put(ConfigKeys.KEY_YARN_APP_QUEUE(), yq)); - Optional.ofNullable(yarnLabelExpr) - .ifPresent(yLabel -> properties.put(ConfigKeys.KEY_YARN_APP_NODE_LABEL(), yLabel)); - } - - Map dynamicProperties = PropertiesUtils - .extractDynamicPropertiesAsJava(application.getDynamicProperties()); - properties.putAll(dynamicProperties); - return properties; + return Tuple2.of(sparkUserJar, appConf); } private void doStopped(Long id) { @@ -549,7 +522,7 @@ private void doStopped(Long id) { if (SparkExecutionMode.isYarnMode(application.getSparkExecutionMode())) { try { List applications = applicationInfoService - .getYarnAppReport(application.getJobName()); + .getYarnAppReport(application.getAppName()); if (!applications.isEmpty()) { YarnClient yarnClient = HadoopUtils.yarnClient(); yarnClient.killApplication(applications.get(0).getApplicationId()); @@ -566,7 +539,7 @@ private void checkYarnBeforeStart(SparkApplication application) { yarnState == STARTED, "[StreamPark] The yarn cluster service state is " + yarnState.name() + ", please check it"); ApiAlertException.throwIfTrue( - !applicationInfoService.getYarnAppReport(application.getJobName()).isEmpty(), + !applicationInfoService.getYarnAppReport(application.getAppName()).isEmpty(), "[StreamPark] The same task name is already running in the yarn queue"); } } diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationInfoServiceImpl.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationInfoServiceImpl.java index 01300ad6f2..4b6398e041 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationInfoServiceImpl.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationInfoServiceImpl.java @@ -31,7 +31,6 @@ import org.apache.streampark.console.core.entity.SparkApplication; import org.apache.streampark.console.core.entity.SparkEnv; import org.apache.streampark.console.core.enums.AppExistsStateEnum; -import org.apache.streampark.console.core.enums.FlinkAppStateEnum; import org.apache.streampark.console.core.enums.SparkAppStateEnum; import org.apache.streampark.console.core.mapper.SparkApplicationMapper; import org.apache.streampark.console.core.runner.EnvInitializer; @@ -52,10 +51,11 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import javax.annotation.Nonnull; + import java.io.File; import java.io.IOException; import java.io.Serializable; -import java.net.URI; import java.util.Arrays; import java.util.Base64; import java.util.Comparator; @@ -79,7 +79,7 @@ public class SparkApplicationInfoServiceImpl private static final int DEFAULT_HISTORY_RECORD_LIMIT = 25; - private static final int DEFAULT_HISTORY_POD_TMPL_RECORD_LIMIT = 5; + private static final int DEFAULT_HISTORY_CONTAINER_IMAGE_RECORD_LIMIT = 5; private static final Pattern JOB_NAME_PATTERN = Pattern.compile("^[.\\x{4e00}-\\x{9fa5}A-Za-z\\d_\\-\\s]+$"); @@ -95,9 +95,64 @@ public class SparkApplicationInfoServiceImpl public Map getDashboardDataMap(Long teamId) { // result json + Long totalNumTasks = 0L; + Long totalNumCompletedTasks = 0L; + Long totalNumStages = 0L; + Long totalNumCompletedStages = 0L; + Long totalUsedMemory = 0L; + Long totalUsedVCores = 0L; + Integer runningApplication = 0; + + for (SparkApplication app : SparkAppHttpWatcher.getWatchingApps()) { + if (!teamId.equals(app.getTeamId())) { + continue; + } + if (app.getState() == SparkAppStateEnum.RUNNING.getValue()) { + runningApplication++; + } + if (app.getNumTasks() != null) { + totalNumTasks += app.getNumTasks(); + } + if (app.getNumCompletedTasks() != null) { + totalNumCompletedTasks += app.getNumCompletedTasks(); + } + if (app.getNumStages() != null) { + totalNumStages += app.getNumStages(); + } + if (app.getNumCompletedStages() != null) { + totalNumCompletedStages += app.getNumCompletedStages(); + } + if (app.getUsedMemory() != null) { + totalUsedMemory += app.getUsedMemory(); + } + if (app.getUsedVCores() != null) { + totalUsedVCores += app.getUsedVCores(); + } + } + + // result json + return constructDashboardMap( + runningApplication, totalNumTasks, totalNumCompletedTasks, totalNumStages, totalNumCompletedStages, + totalUsedMemory, totalUsedVCores); + } + + @Nonnull + private Map constructDashboardMap( + Integer runningApplication, + Long totalNumTasks, + Long totalNumCompletedTasks, + Long totalNumStages, + Long totalNumCompletedStages, + Long totalUsedMemory, + Long totalUsedVCores) { Map dashboardDataMap = new HashMap<>(8); - // TODO: Tasks running metrics for presentation - // dashboardDataMap.put("metrics key", "metrics value"); + dashboardDataMap.put("runningApplication", runningApplication); + dashboardDataMap.put("numTasks", totalNumTasks); + dashboardDataMap.put("numCompletedTasks", totalNumCompletedTasks); + dashboardDataMap.put("numStages", totalNumStages); + dashboardDataMap.put("numCompletedStages", totalNumCompletedStages); + dashboardDataMap.put("usedMemory", totalUsedMemory); + dashboardDataMap.put("usedVCores", totalUsedVCores); return dashboardDataMap; } @@ -159,23 +214,8 @@ public List listRecentK8sNamespace() { } @Override - public List listRecentK8sClusterId(Integer executionMode) { - return baseMapper.selectRecentK8sClusterIds(executionMode, DEFAULT_HISTORY_RECORD_LIMIT); - } - - @Override - public List listRecentK8sPodTemplate() { - return baseMapper.selectRecentK8sPodTemplates(DEFAULT_HISTORY_POD_TMPL_RECORD_LIMIT); - } - - @Override - public List listRecentK8sJmPodTemplate() { - return baseMapper.selectRecentK8sJmPodTemplates(DEFAULT_HISTORY_POD_TMPL_RECORD_LIMIT); - } - - @Override - public List listRecentK8sTmPodTemplate() { - return baseMapper.selectRecentK8sTmPodTemplates(DEFAULT_HISTORY_POD_TMPL_RECORD_LIMIT); + public List listRecentK8sContainerImage() { + return baseMapper.selectRecentK8sPodTemplates(DEFAULT_HISTORY_CONTAINER_IMAGE_RECORD_LIMIT); } @Override @@ -196,7 +236,7 @@ public AppExistsStateEnum checkStart(Long id) { return AppExistsStateEnum.INVALID; } if (SparkExecutionMode.isYarnMode(application.getExecutionMode())) { - boolean exists = !getYarnAppReport(application.getJobName()).isEmpty(); + boolean exists = !getYarnAppReport(application.getAppName()).isEmpty(); return exists ? AppExistsStateEnum.IN_YARN : AppExistsStateEnum.NO; } // todo on k8s check... @@ -244,15 +284,15 @@ public String getYarnName(String appConfig) { @Override public AppExistsStateEnum checkExists(SparkApplication appParam) { - if (!checkJobName(appParam.getJobName())) { + if (!checkJobName(appParam.getAppName())) { return AppExistsStateEnum.INVALID; } - boolean existsByJobName = this.existsByJobName(appParam.getJobName()); + boolean existsByJobName = this.existsByAppName(appParam.getAppName()); if (appParam.getId() != null) { SparkApplication app = getById(appParam.getId()); - if (app.getJobName().equals(appParam.getJobName())) { + if (app.getAppName().equals(appParam.getAppName())) { return AppExistsStateEnum.NO; } @@ -261,10 +301,10 @@ public AppExistsStateEnum checkExists(SparkApplication appParam) { } // has stopped status - if (FlinkAppStateEnum.isEndState(app.getState())) { + if (SparkAppStateEnum.isEndState(app.getState())) { // check whether jobName exists on yarn if (SparkExecutionMode.isYarnMode(appParam.getExecutionMode()) - && YarnUtils.isContains(appParam.getJobName())) { + && YarnUtils.isContains(appParam.getAppName())) { return AppExistsStateEnum.IN_YARN; } } @@ -275,16 +315,16 @@ public AppExistsStateEnum checkExists(SparkApplication appParam) { // check whether jobName exists on yarn if (SparkExecutionMode.isYarnMode(appParam.getExecutionMode()) - && YarnUtils.isContains(appParam.getJobName())) { + && YarnUtils.isContains(appParam.getAppName())) { return AppExistsStateEnum.IN_YARN; } } return AppExistsStateEnum.NO; } - private boolean existsByJobName(String jobName) { + private boolean existsByAppName(String jobName) { return baseMapper.exists( - new LambdaQueryWrapper().eq(SparkApplication::getJobName, jobName)); + new LambdaQueryWrapper().eq(SparkApplication::getAppName, jobName)); } @Override @@ -303,37 +343,6 @@ public String getMain(SparkApplication appParam) { return Utils.getJarManClass(jarFile); } - @Override - public String checkSavepointPath(SparkApplication appParam) throws Exception { - String savepointPath = appParam.getSavepointPath(); - if (StringUtils.isBlank(savepointPath)) { - // savepointPath = savepointService.getSavePointPath(appParam); - } - - if (StringUtils.isNotBlank(savepointPath)) { - final URI uri = URI.create(savepointPath); - final String scheme = uri.getScheme(); - final String pathPart = uri.getPath(); - String error = null; - if (scheme == null) { - error = "This state.savepoints.dir value " - + savepointPath - + " scheme (hdfs://, file://, etc) of is null. Please specify the file system scheme explicitly in the URI."; - } else if (pathPart == null) { - error = "This state.savepoints.dir value " - + savepointPath - + " path part to store the checkpoint data in is null. Please specify a directory path for the checkpoint data."; - } else if (pathPart.isEmpty() || "/".equals(pathPart)) { - error = "This state.savepoints.dir value " - + savepointPath - + " Cannot use the root directory for checkpoints."; - } - return error; - } else { - return "When custom savepoint is not set, state.savepoints.dir needs to be set in properties or flink-conf.yaml of application"; - } - } - private Boolean checkJobName(String jobName) { if (!StringUtils.isBlank(jobName.trim())) { return JOB_NAME_PATTERN.matcher(jobName).matches() diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationManageServiceImpl.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationManageServiceImpl.java index 28c4bfc0b6..eaceffc456 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationManageServiceImpl.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/application/impl/SparkApplicationManageServiceImpl.java @@ -27,31 +27,30 @@ import org.apache.streampark.console.base.mybatis.pager.MybatisPager; import org.apache.streampark.console.base.util.WebUtils; import org.apache.streampark.console.core.bean.AppControl; -import org.apache.streampark.console.core.entity.ApplicationConfig; -import org.apache.streampark.console.core.entity.FlinkSql; import org.apache.streampark.console.core.entity.Resource; import org.apache.streampark.console.core.entity.SparkApplication; +import org.apache.streampark.console.core.entity.SparkApplicationConfig; +import org.apache.streampark.console.core.entity.SparkSql; import org.apache.streampark.console.core.enums.CandidateTypeEnum; import org.apache.streampark.console.core.enums.ChangeTypeEnum; -import org.apache.streampark.console.core.enums.FlinkAppStateEnum; import org.apache.streampark.console.core.enums.OptionStateEnum; import org.apache.streampark.console.core.enums.ReleaseStateEnum; +import org.apache.streampark.console.core.enums.SparkAppStateEnum; import org.apache.streampark.console.core.mapper.SparkApplicationMapper; import org.apache.streampark.console.core.service.AppBuildPipeService; -import org.apache.streampark.console.core.service.ApplicationBackUpService; -import org.apache.streampark.console.core.service.ApplicationConfigService; -import org.apache.streampark.console.core.service.ApplicationLogService; -import org.apache.streampark.console.core.service.EffectiveService; -import org.apache.streampark.console.core.service.FlinkSqlService; import org.apache.streampark.console.core.service.ProjectService; import org.apache.streampark.console.core.service.ResourceService; import org.apache.streampark.console.core.service.SettingService; +import org.apache.streampark.console.core.service.SparkApplicationBackUpService; +import org.apache.streampark.console.core.service.SparkApplicationConfigService; +import org.apache.streampark.console.core.service.SparkApplicationLogService; +import org.apache.streampark.console.core.service.SparkEffectiveService; +import org.apache.streampark.console.core.service.SparkSqlService; import org.apache.streampark.console.core.service.YarnQueueService; import org.apache.streampark.console.core.service.application.SparkApplicationManageService; import org.apache.streampark.console.core.util.ServiceHelper; import org.apache.streampark.flink.packer.pipeline.PipelineStatusEnum; -import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; @@ -73,7 +72,6 @@ import java.io.File; import java.io.IOException; -import java.util.Arrays; import java.util.Collection; import java.util.Date; import java.util.List; @@ -97,19 +95,19 @@ public class SparkApplicationManageServiceImpl private ProjectService projectService; @Autowired - private ApplicationBackUpService backUpService; + private SparkApplicationBackUpService backUpService; @Autowired - private ApplicationConfigService configService; + private SparkApplicationConfigService configService; @Autowired - private ApplicationLogService applicationLogService; + private SparkApplicationLogService applicationLogService; @Autowired - private FlinkSqlService flinkSqlService; + private SparkSqlService sparkSqlService; @Autowired - private EffectiveService effectiveService; + private SparkEffectiveService effectiveService; @Autowired private SettingService settingService; @@ -130,17 +128,17 @@ public void resetOptionState() { @Override public void toEffective(SparkApplication appParam) { + SparkApplicationConfig config = configService.getLatest(appParam.getId()); // set latest to Effective - ApplicationConfig config = configService.getLatest(appParam.getId()); if (config != null) { this.configService.toEffective(appParam.getId(), config.getId()); } if (appParam.isSparkSqlJob()) { - FlinkSql flinkSql = flinkSqlService.getCandidate(appParam.getId(), null); - if (flinkSql != null) { - flinkSqlService.toEffective(appParam.getId(), flinkSql.getId()); + SparkSql sparkSql = sparkSqlService.getCandidate(appParam.getId(), null); + if (sparkSql != null) { + sparkSqlService.toEffective(appParam.getId(), sparkSql.getId()); // clean candidate - flinkSqlService.cleanCandidate(flinkSql.getId()); + sparkSqlService.cleanCandidate(sparkSql.getId()); } } } @@ -163,7 +161,7 @@ public Boolean remove(Long appId) { SparkApplication application = getById(appId); // 1) remove flink sql - flinkSqlService.removeByAppId(application.getId()); + sparkSqlService.removeByAppId(application.getId()); // 2) remove log applicationLogService.removeByAppId(application.getId()); @@ -212,19 +210,8 @@ public IPage page(SparkApplication appParam, RestRequest reque return null; } Page page = MybatisPager.getPage(request); - - if (ArrayUtils.isNotEmpty(appParam.getStateArray()) - && Arrays.stream(appParam.getStateArray()) - .anyMatch(x -> x == FlinkAppStateEnum.FINISHED.getValue())) { - Integer[] newArray = ArrayUtils.insert( - appParam.getStateArray().length, - appParam.getStateArray(), - FlinkAppStateEnum.POS_TERMINATED.getValue()); - appParam.setStateArray(newArray); - } this.baseMapper.selectPage(page, appParam); List records = page.getRecords(); - long now = System.currentTimeMillis(); List appIds = records.stream().map(SparkApplication::getId).collect(Collectors.toList()); Map pipeStates = appBuildPipeService.listAppIdPipelineStatusMap(appIds); @@ -268,18 +255,20 @@ public void changeOwnership(Long userId, Long targetUserId) { public boolean create(SparkApplication appParam) { ApiAlertException.throwIfNull( appParam.getTeamId(), "The teamId can't be null. Create application failed."); + appParam.setUserId(ServiceHelper.getUserId()); - appParam.setState(FlinkAppStateEnum.ADDED.getValue()); + appParam.setState(SparkAppStateEnum.ADDED.getValue()); appParam.setRelease(ReleaseStateEnum.NEED_RELEASE.get()); appParam.setOptionState(OptionStateEnum.NONE.getValue()); - appParam.setDefaultModeIngress(settingService.getIngressModeDefault()); + appParam.setCreateTime(new Date()); + appParam.setModifyTime(appParam.getCreateTime()); boolean success = validateQueueIfNeeded(appParam); ApiAlertException.throwIfFalse( success, String.format(ERROR_APP_QUEUE_HINT, appParam.getYarnQueue(), appParam.getTeamId())); + appParam.resolveYarnQueue(); - appParam.doSetHotParams(); if (appParam.isUploadJob()) { String jarPath = String.format( "%s/%d/%s", Workspace.local().APP_UPLOADS(), appParam.getTeamId(), appParam.getJar()); @@ -294,11 +283,11 @@ public boolean create(SparkApplication appParam) { if (save(appParam)) { if (appParam.isSparkSqlJob()) { - FlinkSql flinkSql = new FlinkSql(appParam); - flinkSqlService.create(flinkSql); + SparkSql sparkSql = new SparkSql(appParam); + sparkSqlService.create(sparkSql); } if (appParam.getConfig() != null) { - // configService.create(appParam, true); + configService.create(appParam, true); } return true; } else { @@ -306,78 +295,71 @@ public boolean create(SparkApplication appParam) { } } - private boolean existsByJobName(String jobName) { + private boolean existsByAppName(String jobName) { return baseMapper.exists( - new LambdaQueryWrapper().eq(SparkApplication::getJobName, jobName)); + new LambdaQueryWrapper().eq(SparkApplication::getAppName, jobName)); } @SuppressWarnings("checkstyle:WhitespaceAround") @Override @SneakyThrows public Long copy(SparkApplication appParam) { - boolean existsByJobName = this.existsByJobName(appParam.getJobName()); + boolean existsByAppName = this.existsByAppName(appParam.getAppName()); ApiAlertException.throwIfFalse( - !existsByJobName, + !existsByAppName, "[StreamPark] Application names can't be repeated, copy application failed."); SparkApplication oldApp = getById(appParam.getId()); SparkApplication newApp = new SparkApplication(); - String jobName = appParam.getJobName(); - - newApp.setJobName(jobName); - newApp.setClusterId(jobName); - newApp.setArgs(appParam.getArgs() != null ? appParam.getArgs() : oldApp.getArgs()); - newApp.setVersionId(oldApp.getVersionId()); - newApp.setSparkClusterId(oldApp.getSparkClusterId()); - newApp.setRestartSize(oldApp.getRestartSize()); + newApp.setTeamId(oldApp.getTeamId()); newApp.setJobType(oldApp.getJobType()); - newApp.setOptions(oldApp.getOptions()); - newApp.setDynamicProperties(oldApp.getDynamicProperties()); - newApp.setResolveOrder(oldApp.getResolveOrder()); - newApp.setExecutionMode(oldApp.getExecutionMode()); - newApp.setSparkImage(oldApp.getSparkImage()); - newApp.setK8sNamespace(oldApp.getK8sNamespace()); - newApp.setK8sRestExposedType(oldApp.getK8sRestExposedType()); - newApp.setK8sPodTemplate(oldApp.getK8sPodTemplate()); - newApp.setK8sJmPodTemplate(oldApp.getK8sJmPodTemplate()); - newApp.setK8sTmPodTemplate(oldApp.getK8sTmPodTemplate()); - newApp.setK8sHadoopIntegration(oldApp.getK8sHadoopIntegration()); - newApp.setDescription(oldApp.getDescription()); - newApp.setAlertId(oldApp.getAlertId()); - newApp.setCpFailureAction(oldApp.getCpFailureAction()); - newApp.setCpFailureRateInterval(oldApp.getCpFailureRateInterval()); - newApp.setCpMaxFailureInterval(oldApp.getCpMaxFailureInterval()); - newApp.setMainClass(oldApp.getMainClass()); newApp.setAppType(oldApp.getAppType()); + newApp.setVersionId(oldApp.getVersionId()); + newApp.setAppName(appParam.getAppName()); + newApp.setExecutionMode(oldApp.getExecutionMode()); newApp.setResourceFrom(oldApp.getResourceFrom()); newApp.setProjectId(oldApp.getProjectId()); newApp.setModule(oldApp.getModule()); - newApp.setUserId(ServiceHelper.getUserId()); - newApp.setState(FlinkAppStateEnum.ADDED.getValue()); - newApp.setRelease(ReleaseStateEnum.NEED_RELEASE.get()); - newApp.setOptionState(OptionStateEnum.NONE.getValue()); - newApp.setHotParams(oldApp.getHotParams()); - + newApp.setMainClass(oldApp.getMainClass()); newApp.setJar(oldApp.getJar()); newApp.setJarCheckSum(oldApp.getJarCheckSum()); - newApp.setTags(oldApp.getTags()); - newApp.setTeamId(oldApp.getTeamId()); + newApp.setAppProperties(oldApp.getAppProperties()); + newApp.setAppArgs(appParam.getAppArgs() != null ? appParam.getAppArgs() : oldApp.getAppArgs()); + newApp.setYarnQueue(oldApp.getYarnQueue()); + newApp.resolveYarnQueue(); + newApp.setK8sMasterUrl(oldApp.getK8sMasterUrl()); + newApp.setK8sContainerImage(oldApp.getK8sContainerImage()); + newApp.setK8sImagePullPolicy(oldApp.getK8sImagePullPolicy()); + newApp.setK8sServiceAccount(oldApp.getK8sServiceAccount()); + newApp.setK8sNamespace(oldApp.getK8sNamespace()); + newApp.setHadoopUser(oldApp.getHadoopUser()); + newApp.setRestartSize(oldApp.getRestartSize()); + newApp.setState(SparkAppStateEnum.ADDED.getValue()); + newApp.setOptions(oldApp.getOptions()); + newApp.setOptionState(OptionStateEnum.NONE.getValue()); + newApp.setUserId(ServiceHelper.getUserId()); + newApp.setDescription(oldApp.getDescription()); + newApp.setRelease(ReleaseStateEnum.NEED_RELEASE.get()); + newApp.setAlertId(oldApp.getAlertId()); + newApp.setCreateTime(new Date()); + newApp.setModifyTime(newApp.getCreateTime()); + newApp.setTags(oldApp.getTags()); boolean saved = save(newApp); if (saved) { if (newApp.isSparkSqlJob()) { - FlinkSql copyFlinkSql = flinkSqlService.getLatestFlinkSql(appParam.getId(), true); - newApp.setSparkSql(copyFlinkSql.getSql()); - newApp.setTeamResource(copyFlinkSql.getTeamResource()); - newApp.setDependency(copyFlinkSql.getDependency()); - FlinkSql flinkSql = new FlinkSql(newApp); - flinkSqlService.create(flinkSql); + SparkSql copySparkSql = sparkSqlService.getLatestSparkSql(appParam.getId(), true); + newApp.setSparkSql(copySparkSql.getSql()); + newApp.setTeamResource(copySparkSql.getTeamResource()); + newApp.setDependency(copySparkSql.getDependency()); + SparkSql sparkSql = new SparkSql(newApp); + sparkSqlService.create(sparkSql); } - ApplicationConfig copyConfig = configService.getEffective(appParam.getId()); + SparkApplicationConfig copyConfig = configService.getEffective(appParam.getId()); if (copyConfig != null) { - ApplicationConfig config = new ApplicationConfig(); + SparkApplicationConfig config = new SparkApplicationConfig(); config.setAppId(newApp.getId()); config.setFormat(copyConfig.getFormat()); config.setContent(copyConfig.getContent()); @@ -389,7 +371,7 @@ public Long copy(SparkApplication appParam) { return newApp.getId(); } else { throw new ApiAlertException( - "create application from copy failed, copy source app: " + oldApp.getJobName()); + "create application from copy failed, copy source app: " + oldApp.getAppName()); } } @@ -440,30 +422,26 @@ public boolean update(SparkApplication appParam) { appParam.setJobType(application.getJobType()); // changes to the following parameters need to be re-release to take effect - application.setJobName(appParam.getJobName()); application.setVersionId(appParam.getVersionId()); - application.setArgs(appParam.getArgs()); - application.setOptions(appParam.getOptions()); - application.setDynamicProperties(appParam.getDynamicProperties()); - application.setResolveOrder(appParam.getResolveOrder()); + application.setAppName(appParam.getAppName()); application.setExecutionMode(appParam.getExecutionMode()); - application.setClusterId(appParam.getClusterId()); - application.setSparkImage(appParam.getSparkImage()); + application.setAppProperties(appParam.getAppProperties()); + application.setAppArgs(appParam.getAppArgs()); + application.setOptions(appParam.getOptions()); + + application.setYarnQueue(appParam.getYarnQueue()); + application.resolveYarnQueue(); + + application.setK8sMasterUrl(appParam.getK8sMasterUrl()); + application.setK8sContainerImage(appParam.getK8sContainerImage()); + application.setK8sImagePullPolicy(appParam.getK8sImagePullPolicy()); + application.setK8sServiceAccount(appParam.getK8sServiceAccount()); application.setK8sNamespace(appParam.getK8sNamespace()); - application.updateHotParams(appParam); - application.setK8sRestExposedType(appParam.getK8sRestExposedType()); - application.setK8sPodTemplate(appParam.getK8sPodTemplate()); - application.setK8sJmPodTemplate(appParam.getK8sJmPodTemplate()); - application.setK8sTmPodTemplate(appParam.getK8sTmPodTemplate()); - application.setK8sHadoopIntegration(appParam.getK8sHadoopIntegration()); // changes to the following parameters do not affect running tasks application.setDescription(appParam.getDescription()); application.setAlertId(appParam.getAlertId()); application.setRestartSize(appParam.getRestartSize()); - application.setCpFailureAction(appParam.getCpFailureAction()); - application.setCpFailureRateInterval(appParam.getCpFailureRateInterval()); - application.setCpMaxFailureInterval(appParam.getCpMaxFailureInterval()); application.setTags(appParam.getTags()); switch (appParam.getSparkExecutionMode()) { @@ -471,16 +449,13 @@ public boolean update(SparkApplication appParam) { case YARN_CLIENT: application.setHadoopUser(appParam.getHadoopUser()); break; - case REMOTE: - application.setSparkClusterId(appParam.getSparkClusterId()); - break; default: break; } - // Flink Sql job... + // Spark Sql job... if (application.isSparkSqlJob()) { - updateFlinkSqlJob(application, appParam); + updateSparkSqlJob(application, appParam); return true; } @@ -503,59 +478,59 @@ public boolean update(SparkApplication appParam) { * @param application * @param appParam */ - private void updateFlinkSqlJob(SparkApplication application, SparkApplication appParam) { - FlinkSql effectiveFlinkSql = flinkSqlService.getEffective(application.getId(), true); - if (effectiveFlinkSql == null) { - effectiveFlinkSql = flinkSqlService.getCandidate(application.getId(), CandidateTypeEnum.NEW); - flinkSqlService.removeById(effectiveFlinkSql.getId()); - FlinkSql sql = new FlinkSql(appParam); - flinkSqlService.create(sql); + private void updateSparkSqlJob(SparkApplication application, SparkApplication appParam) { + SparkSql effectiveSparkSql = sparkSqlService.getEffective(application.getId(), true); + if (effectiveSparkSql == null) { + effectiveSparkSql = sparkSqlService.getCandidate(application.getId(), CandidateTypeEnum.NEW); + sparkSqlService.removeById(effectiveSparkSql.getId()); + SparkSql sql = new SparkSql(appParam); + sparkSqlService.create(sql); application.setBuild(true); } else { // get previous flink sql and decode - FlinkSql copySourceFlinkSql = flinkSqlService.getById(appParam.getSqlId()); + SparkSql copySourceSparkSql = sparkSqlService.getById(appParam.getSqlId()); ApiAlertException.throwIfNull( - copySourceFlinkSql, "Flink sql is null, update flink sql job failed."); - copySourceFlinkSql.decode(); + copySourceSparkSql, "Flink sql is null, update flink sql job failed."); + copySourceSparkSql.decode(); // get submit flink sql - FlinkSql targetFlinkSql = new FlinkSql(appParam); + SparkSql targetFlinkSql = new SparkSql(appParam); // judge sql and dependency has changed - ChangeTypeEnum changeTypeEnum = copySourceFlinkSql.checkChange(targetFlinkSql); + ChangeTypeEnum changeTypeEnum = copySourceSparkSql.checkChange(targetFlinkSql); log.info("updateFlinkSqlJob changeTypeEnum: {}", changeTypeEnum); // if has been changed if (changeTypeEnum.hasChanged()) { // check if there is a candidate version for the newly added record - FlinkSql newFlinkSql = flinkSqlService.getCandidate(application.getId(), CandidateTypeEnum.NEW); + SparkSql newSparkSql = sparkSqlService.getCandidate(application.getId(), CandidateTypeEnum.NEW); // If the candidate version of the new record exists, it will be deleted directly, // and only one candidate version will be retained. If the new candidate version is not // effective, // if it is edited again and the next record comes in, the previous candidate version will // be deleted. - if (newFlinkSql != null) { + if (newSparkSql != null) { // delete all records about candidates - flinkSqlService.removeById(newFlinkSql.getId()); + sparkSqlService.removeById(newSparkSql.getId()); } - FlinkSql historyFlinkSql = flinkSqlService.getCandidate(application.getId(), CandidateTypeEnum.HISTORY); + SparkSql historySparkSql = sparkSqlService.getCandidate(application.getId(), CandidateTypeEnum.HISTORY); // remove candidate flags that already exist but are set as candidates - if (historyFlinkSql != null) { - flinkSqlService.cleanCandidate(historyFlinkSql.getId()); + if (historySparkSql != null) { + sparkSqlService.cleanCandidate(historySparkSql.getId()); } - FlinkSql sql = new FlinkSql(appParam); - flinkSqlService.create(sql); + SparkSql sql = new SparkSql(appParam); + sparkSqlService.create(sql); if (changeTypeEnum.isDependencyChanged()) { application.setBuild(true); } } else { // judge version has changed - boolean versionChanged = !effectiveFlinkSql.getId().equals(appParam.getSqlId()); + boolean versionChanged = !effectiveSparkSql.getId().equals(appParam.getSqlId()); if (versionChanged) { // sql and dependency not changed, but version changed, means that rollback to the version CandidateTypeEnum type = CandidateTypeEnum.HISTORY; - flinkSqlService.setCandidate(type, appParam.getId(), appParam.getSqlId()); + sparkSqlService.setCandidate(type, appParam.getId(), appParam.getSqlId()); application.setRelease(ReleaseStateEnum.NEED_ROLLBACK.get()); application.setBuild(true); } @@ -619,8 +594,8 @@ public boolean checkBuildAndUpdate(SparkApplication appParam) { // If the current task is not running, or the task has just been added, // directly set the candidate version to the official version - FlinkSql flinkSql = flinkSqlService.getEffective(appParam.getId(), false); - if (!appParam.isRunning() || flinkSql == null) { + SparkSql sparkSql = sparkSqlService.getEffective(appParam.getId(), false); + if (!appParam.isRunning() || sparkSql == null) { this.toEffective(appParam); } } @@ -636,18 +611,18 @@ public void clean(SparkApplication appParam) { @Override public SparkApplication getApp(Long id) { SparkApplication application = this.baseMapper.selectApp(id); - ApplicationConfig config = configService.getEffective(id); + SparkApplicationConfig config = configService.getEffective(id); config = config == null ? configService.getLatest(id) : config; if (config != null) { config.setToApplication(application); } if (application.isSparkSqlJob()) { - FlinkSql flinkSql = flinkSqlService.getEffective(application.getId(), true); - if (flinkSql == null) { - flinkSql = flinkSqlService.getCandidate(application.getId(), CandidateTypeEnum.NEW); - flinkSql.setSql(DeflaterUtils.unzipString(flinkSql.getSql())); + SparkSql sparkSql = sparkSqlService.getEffective(application.getId(), true); + if (sparkSql == null) { + sparkSql = sparkSqlService.getCandidate(application.getId(), CandidateTypeEnum.NEW); + sparkSql.setSql(DeflaterUtils.unzipString(sparkSql.getSql())); } - flinkSql.setToApplication(application); + sparkSql.setToApplication(application); } else { if (application.isCICDJob()) { String path = this.projectService.getAppConfPath(application.getProjectId(), application.getModule()); @@ -655,7 +630,7 @@ public SparkApplication getApp(Long id) { } } - application.setYarnQueueByHotParams(); + application.resolveYarnQueue(); return application; } @@ -689,7 +664,7 @@ public boolean validateQueueIfNeeded(SparkApplication oldApp, SparkApplication n return true; } - oldApp.setYarnQueueByHotParams(); + oldApp.resolveYarnQueue(); if (SparkExecutionMode.isYarnMode(newApp.getSparkExecutionMode()) && StringUtils.equals(oldApp.getYarnQueue(), newApp.getYarnQueue())) { return true; diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkAppBuildPipeServiceImpl.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkAppBuildPipeServiceImpl.java index b9dd40e1ca..1a67704fb3 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkAppBuildPipeServiceImpl.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkAppBuildPipeServiceImpl.java @@ -20,7 +20,7 @@ import org.apache.streampark.common.Constant; import org.apache.streampark.common.conf.Workspace; import org.apache.streampark.common.enums.ApplicationType; -import org.apache.streampark.common.enums.FlinkDevelopmentMode; +import org.apache.streampark.common.enums.SparkDevelopmentMode; import org.apache.streampark.common.enums.SparkExecutionMode; import org.apache.streampark.common.fs.FsOperator; import org.apache.streampark.common.util.AssertUtils; @@ -31,26 +31,26 @@ import org.apache.streampark.console.base.util.WebUtils; import org.apache.streampark.console.core.bean.Dependency; import org.apache.streampark.console.core.entity.AppBuildPipeline; -import org.apache.streampark.console.core.entity.ApplicationConfig; -import org.apache.streampark.console.core.entity.FlinkSql; import org.apache.streampark.console.core.entity.Message; import org.apache.streampark.console.core.entity.Resource; import org.apache.streampark.console.core.entity.SparkApplication; +import org.apache.streampark.console.core.entity.SparkApplicationConfig; import org.apache.streampark.console.core.entity.SparkApplicationLog; import org.apache.streampark.console.core.entity.SparkEnv; +import org.apache.streampark.console.core.entity.SparkSql; import org.apache.streampark.console.core.enums.CandidateTypeEnum; import org.apache.streampark.console.core.enums.NoticeTypeEnum; import org.apache.streampark.console.core.enums.OptionStateEnum; import org.apache.streampark.console.core.enums.ReleaseStateEnum; import org.apache.streampark.console.core.enums.ResourceTypeEnum; import org.apache.streampark.console.core.mapper.ApplicationBuildPipelineMapper; -import org.apache.streampark.console.core.service.ApplicationConfigService; -import org.apache.streampark.console.core.service.FlinkSqlService; import org.apache.streampark.console.core.service.MessageService; import org.apache.streampark.console.core.service.ResourceService; import org.apache.streampark.console.core.service.SparkAppBuildPipeService; +import org.apache.streampark.console.core.service.SparkApplicationConfigService; import org.apache.streampark.console.core.service.SparkApplicationLogService; import org.apache.streampark.console.core.service.SparkEnvService; +import org.apache.streampark.console.core.service.SparkSqlService; import org.apache.streampark.console.core.service.application.SparkApplicationInfoService; import org.apache.streampark.console.core.service.application.SparkApplicationManageService; import org.apache.streampark.console.core.util.ServiceHelper; @@ -106,7 +106,7 @@ public class SparkAppBuildPipeServiceImpl private SparkEnvService sparkEnvService; @Autowired - private FlinkSqlService flinkSqlService; + private SparkSqlService sparkSqlService; @Autowired private MessageService messageService; @@ -124,7 +124,7 @@ public class SparkAppBuildPipeServiceImpl private SparkAppHttpWatcher sparkAppHttpWatcher; @Autowired - private ApplicationConfigService applicationConfigService; + private SparkApplicationConfigService applicationConfigService; @Autowired private ResourceService resourceService; @@ -163,13 +163,13 @@ public boolean buildApplication(@Nonnull Long appId, boolean forceBuild) { } // 1) spark sql setDependency - FlinkSql newFlinkSql = flinkSqlService.getCandidate(app.getId(), CandidateTypeEnum.NEW); - FlinkSql effectiveFlinkSql = flinkSqlService.getEffective(app.getId(), false); + SparkSql newSparkSql = sparkSqlService.getCandidate(app.getId(), CandidateTypeEnum.NEW); + SparkSql effectiveSparkSql = sparkSqlService.getEffective(app.getId(), false); if (app.isSparkSqlJob()) { - FlinkSql flinkSql = newFlinkSql == null ? effectiveFlinkSql : newFlinkSql; - AssertUtils.notNull(flinkSql); - app.setDependency(flinkSql.getDependency()); - app.setTeamResource(flinkSql.getTeamResource()); + SparkSql sparkSql = newSparkSql == null ? effectiveSparkSql : newSparkSql; + AssertUtils.notNull(sparkSql); + app.setDependency(sparkSql.getDependency()); + app.setTeamResource(sparkSql.getTeamResource()); } // create pipeline instance @@ -286,11 +286,11 @@ public void onFinish(PipelineSnapshot snapshot, BuildResult result) { // If the current task is not running, or the task has just been added, directly // set // the candidate version to the official version - if (app.isSparkSqlJob()) { + if (app.isCustomCodeOrSparkSqlJob()) { applicationManageService.toEffective(app); } else { if (app.isStreamParkJob()) { - ApplicationConfig config = + SparkApplicationConfig config = applicationConfigService.getLatest(app.getId()); if (config != null) { config.setToApplication(app); @@ -307,7 +307,7 @@ public void onFinish(PipelineSnapshot snapshot, BuildResult result) { Message message = new Message( ServiceHelper.getUserId(), app.getId(), - app.getJobName().concat(" release failed"), + app.getAppName().concat(" release failed"), ExceptionUtils.stringifyException(snapshot.error().exception()), NoticeTypeEnum.EXCEPTION); messageService.push(message); @@ -342,16 +342,16 @@ public void onFinish(PipelineSnapshot snapshot, BuildResult result) { private void checkBuildEnv(Long appId, boolean forceBuild) { SparkApplication app = applicationManageService.getById(appId); - // 1) check flink version + // 1) check spark version SparkEnv env = sparkEnvService.getById(app.getVersionId()); boolean checkVersion = env.getSparkVersion().checkVersion(false); ApiAlertException.throwIfFalse( - checkVersion, "Unsupported flink version:" + env.getSparkVersion().version()); + checkVersion, "Unsupported spark version:" + env.getSparkVersion().version()); // 2) check env boolean envOk = applicationInfoService.checkEnv(app); ApiAlertException.throwIfFalse( - envOk, "Check flink env failed, please check the flink version of this job"); + envOk, "Check spark env failed, please check the spark version of this job"); // 3) Whether the application can currently start a new building progress ApiAlertException.throwIfTrue( @@ -378,13 +378,13 @@ private BuildPipeline createPipelineInstance(@Nonnull SparkApplication app) { case YARN_CLIENT: String yarnProvidedPath = app.getAppLib(); String localWorkspace = app.getLocalAppHome().concat("/lib"); - if (FlinkDevelopmentMode.CUSTOM_CODE == app.getDevelopmentMode() - && ApplicationType.APACHE_FLINK == app.getApplicationType()) { + if (SparkDevelopmentMode.CUSTOM_CODE == app.getDevelopmentMode() + && ApplicationType.APACHE_SPARK == app.getApplicationType()) { yarnProvidedPath = app.getAppHome(); localWorkspace = app.getLocalAppHome(); } SparkYarnApplicationBuildRequest yarnAppRequest = new SparkYarnApplicationBuildRequest( - app.getJobName(), + app.getAppName(), mainClass, localWorkspace, yarnProvidedPath, @@ -412,10 +412,12 @@ private String retrieveSparkUserJar(SparkEnv sparkEnv, SparkApplication app) { "[StreamPark] unsupported ApplicationType of custom code: " + app.getApplicationType()); } - case PYFLINK: + case PYSPARK: return String.format("%s/%s", app.getAppHome(), app.getJar()); - case FLINK_SQL: + + case SPARK_SQL: String sqlDistJar = ServiceHelper.getSparkSqlClientJar(sparkEnv); + if (app.getSparkExecutionMode() == SparkExecutionMode.YARN_CLUSTER) { String clientPath = Workspace.remote().APP_CLIENT(); return String.format("%s/%s", clientPath, sqlDistJar); diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkApplicationBackUpServiceImpl.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkApplicationBackUpServiceImpl.java new file mode 100644 index 0000000000..df094eb920 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkApplicationBackUpServiceImpl.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.service.impl; + +import org.apache.streampark.common.fs.FsOperator; +import org.apache.streampark.console.base.domain.RestRequest; +import org.apache.streampark.console.base.exception.ApiAlertException; +import org.apache.streampark.console.base.exception.InternalException; +import org.apache.streampark.console.base.mybatis.pager.MybatisPager; +import org.apache.streampark.console.core.entity.SparkApplication; +import org.apache.streampark.console.core.entity.SparkApplicationBackUp; +import org.apache.streampark.console.core.entity.SparkApplicationConfig; +import org.apache.streampark.console.core.entity.SparkSql; +import org.apache.streampark.console.core.enums.EffectiveTypeEnum; +import org.apache.streampark.console.core.enums.ReleaseStateEnum; +import org.apache.streampark.console.core.mapper.SparkApplicationBackUpMapper; +import org.apache.streampark.console.core.service.SparkApplicationBackUpService; +import org.apache.streampark.console.core.service.SparkApplicationConfigService; +import org.apache.streampark.console.core.service.SparkEffectiveService; +import org.apache.streampark.console.core.service.SparkSqlService; +import org.apache.streampark.console.core.service.application.SparkApplicationManageService; + +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.core.conditions.update.UpdateWrapper; +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Propagation; +import org.springframework.transaction.annotation.Transactional; + +@Slf4j +@Service +@Transactional(propagation = Propagation.SUPPORTS, readOnly = true, rollbackFor = Exception.class) +public class SparkApplicationBackUpServiceImpl + extends + ServiceImpl + implements + SparkApplicationBackUpService { + + @Autowired + private SparkApplicationManageService applicationManageService; + + @Autowired + private SparkApplicationConfigService configService; + + @Autowired + private SparkEffectiveService effectiveService; + + @Autowired + private SparkSqlService sparkSqlService; + + @Override + public IPage getPage(SparkApplicationBackUp bakParam, RestRequest request) { + Page page = MybatisPager.getPage(request); + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper() + .eq(SparkApplicationBackUp::getAppId, bakParam.getAppId()); + return this.baseMapper.selectPage(page, queryWrapper); + } + + @Override + public void rollback(SparkApplicationBackUp bakParam) { + + SparkApplication application = applicationManageService.getById(bakParam.getAppId()); + + FsOperator fsOperator = application.getFsOperator(); + // backup files not exist + if (!fsOperator.exists(bakParam.getPath())) { + return; + } + + // if backup files exists, will be rollback + // When rollback, determine the currently effective project is necessary to be + // backed up. + // If necessary, perform the backup first + if (bakParam.isBackup()) { + application.setBackUpDescription(bakParam.getDescription()); + if (application.isSparkSqlJob()) { + SparkSql sparkSql = sparkSqlService.getEffective(application.getId(), false); + backup(application, sparkSql); + } else { + backup(application, null); + } + } + + // restore config and sql + + // if running, set Latest + if (application.isRunning()) { + // rollback to back up config + configService.setLatestOrEffective(true, bakParam.getId(), bakParam.getAppId()); + } else { + effectiveService.saveOrUpdate( + bakParam.getAppId(), EffectiveTypeEnum.SPARKCONFIG, bakParam.getId()); + // if spark sql task, will be rollback sql and dependencies + if (application.isSparkSqlJob()) { + effectiveService.saveOrUpdate( + bakParam.getAppId(), EffectiveTypeEnum.SPARKSQL, bakParam.getSqlId()); + } + } + + // delete the current valid project files (Note: If the rollback failed, need to + // restore) + fsOperator.delete(application.getAppHome()); + + // copy backup files to a valid dir + fsOperator.copyDir(bakParam.getPath(), application.getAppHome()); + + // update restart status + applicationManageService.update( + new UpdateWrapper() + .lambda() + .eq(SparkApplication::getId, application.getId()) + .set(SparkApplication::getRelease, ReleaseStateEnum.NEED_RESTART.get())); + } + + @Override + public void revoke(SparkApplication appParam) { + Page page = new Page<>(); + page.setCurrent(0).setSize(1).setSearchCount(false); + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper() + .eq(SparkApplicationBackUp::getAppId, appParam.getId()) + .orderByDesc(SparkApplicationBackUp::getCreateTime); + + Page backUpPages = baseMapper.selectPage(page, queryWrapper); + if (!backUpPages.getRecords().isEmpty()) { + SparkApplicationBackUp backup = backUpPages.getRecords().get(0); + String path = backup.getPath(); + appParam.getFsOperator().move(path, appParam.getWorkspace().APP_WORKSPACE()); + super.removeById(backup.getId()); + } + } + + @Override + public void remove(SparkApplication appParam) { + try { + baseMapper.delete( + new LambdaQueryWrapper() + .eq(SparkApplicationBackUp::getAppId, appParam.getId())); + appParam + .getFsOperator() + .delete( + appParam + .getWorkspace() + .APP_BACKUPS() + .concat("/") + .concat(appParam.getId().toString())); + } catch (Exception e) { + log.error(e.getMessage(), e); + } + } + + @Override + public void rollbackSparkSql(SparkApplication appParam, SparkSql sparkSqlParam) { + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper() + .eq(SparkApplicationBackUp::getAppId, appParam.getId()) + .eq(SparkApplicationBackUp::getSqlId, sparkSqlParam.getId()); + SparkApplicationBackUp backUp = baseMapper.selectOne(queryWrapper); + ApiAlertException.throwIfNull( + backUp, "Application backup can't be null. Rollback spark sql failed."); + // rollback config and sql + effectiveService.saveOrUpdate(backUp.getAppId(), EffectiveTypeEnum.SPARKCONFIG, backUp.getId()); + effectiveService.saveOrUpdate(backUp.getAppId(), EffectiveTypeEnum.SPARKSQL, backUp.getSqlId()); + } + + @Override + public Boolean removeById(Long id) throws InternalException { + SparkApplicationBackUp backUp = getById(id); + try { + SparkApplication application = applicationManageService.getById(backUp.getAppId()); + application.getFsOperator().delete(backUp.getPath()); + super.removeById(id); + return true; + } catch (Exception e) { + throw new InternalException(e.getMessage()); + } + } + + @Override + public void backup(SparkApplication appParam, SparkSql sparkSqlParam) { + // basic configuration file backup + String appHome = (appParam.isCustomCodeJob() && appParam.isCICDJob()) + ? appParam.getDistHome() + : appParam.getAppHome(); + FsOperator fsOperator = appParam.getFsOperator(); + if (fsOperator.exists(appHome)) { + // move files to back up directory + SparkApplicationConfig config = configService.getEffective(appParam.getId()); + if (config != null) { + appParam.setConfigId(config.getId()); + } + // spark sql tasks need to back up sql and dependencies + int version = 1; + if (sparkSqlParam != null) { + appParam.setSqlId(sparkSqlParam.getId()); + version = sparkSqlParam.getVersion(); + } else if (config != null) { + version = config.getVersion(); + } + + SparkApplicationBackUp applicationBackUp = new SparkApplicationBackUp(appParam); + applicationBackUp.setVersion(version); + + this.save(applicationBackUp); + fsOperator.mkdirs(applicationBackUp.getPath()); + fsOperator.copyDir(appHome, applicationBackUp.getPath()); + } + } +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkApplicationConfigServiceImpl.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkApplicationConfigServiceImpl.java new file mode 100644 index 0000000000..c8dd9bd8cd --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkApplicationConfigServiceImpl.java @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.service.impl; + +import org.apache.streampark.common.util.DeflaterUtils; +import org.apache.streampark.common.util.Utils; +import org.apache.streampark.console.base.domain.RestRequest; +import org.apache.streampark.console.base.exception.ApiAlertException; +import org.apache.streampark.console.base.mybatis.pager.MybatisPager; +import org.apache.streampark.console.core.entity.SparkApplication; +import org.apache.streampark.console.core.entity.SparkApplicationConfig; +import org.apache.streampark.console.core.enums.ConfigFileTypeEnum; +import org.apache.streampark.console.core.enums.EffectiveTypeEnum; +import org.apache.streampark.console.core.mapper.SparkApplicationConfigMapper; +import org.apache.streampark.console.core.service.SparkApplicationConfigService; +import org.apache.streampark.console.core.service.SparkEffectiveService; + +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.core.toolkit.Wrappers; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.io.Resource; +import org.springframework.core.io.ResourceLoader; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Propagation; +import org.springframework.transaction.annotation.Transactional; + +import java.util.Base64; +import java.util.Date; +import java.util.List; +import java.util.Scanner; + +@Slf4j +@Service +@Transactional(propagation = Propagation.SUPPORTS, readOnly = true, rollbackFor = Exception.class) +public class SparkApplicationConfigServiceImpl + extends + ServiceImpl + implements + SparkApplicationConfigService { + + private String sparkConfTemplate = null; + + @Autowired + private ResourceLoader resourceLoader; + + @Autowired + private SparkEffectiveService effectiveService; + + @Override + public synchronized void create(SparkApplication appParam, Boolean latest) { + String decode = new String(Base64.getDecoder().decode(appParam.getConfig())); + String config = DeflaterUtils.zipString(decode.trim()); + + SparkApplicationConfig sparkApplicationConfig = new SparkApplicationConfig(); + sparkApplicationConfig.setAppId(appParam.getId()); + + if (appParam.getFormat() != null) { + ConfigFileTypeEnum fileType = ConfigFileTypeEnum.of(appParam.getFormat()); + ApiAlertException.throwIfTrue( + fileType == null || ConfigFileTypeEnum.UNKNOWN == fileType, + "spark application config error. must be (.properties|.yaml|.yml|.conf)"); + + sparkApplicationConfig.setFormat(fileType.getValue()); + } + + sparkApplicationConfig.setContent(config); + sparkApplicationConfig.setCreateTime(new Date()); + Integer version = this.baseMapper.selectLastVersion(appParam.getId()); + sparkApplicationConfig.setVersion(version == null ? 1 : version + 1); + save(sparkApplicationConfig); + this.setLatestOrEffective(latest, sparkApplicationConfig.getId(), appParam.getId()); + } + + public void setLatest(Long appId, Long configId) { + LambdaUpdateWrapper updateWrapper = Wrappers.lambdaUpdate(); + updateWrapper.set(SparkApplicationConfig::getLatest, false).eq(SparkApplicationConfig::getAppId, appId); + this.update(updateWrapper); + + updateWrapper.clear(); + updateWrapper.set(SparkApplicationConfig::getLatest, true).eq(SparkApplicationConfig::getId, configId); + this.update(updateWrapper); + } + + @Override + public synchronized void update(SparkApplication appParam, Boolean latest) { + // spark sql job + SparkApplicationConfig latestConfig = getLatest(appParam.getId()); + if (appParam.isSparkSqlJob()) { + updateForSparkSqlJob(appParam, latest, latestConfig); + } else { + updateForNonSparkSqlJob(appParam, latest, latestConfig); + } + } + + private void updateForNonSparkSqlJob(SparkApplication appParam, Boolean latest, + SparkApplicationConfig latestConfig) { + // may be re-selected a config file (without config id), or may be based on an original edit + // (with config Id). + Long configId = appParam.getConfigId(); + // an original edit + if (configId != null) { + SparkApplicationConfig config = this.getById(configId); + String decode = new String(Base64.getDecoder().decode(appParam.getConfig())); + String encode = DeflaterUtils.zipString(decode.trim()); + // create... + if (!config.getContent().equals(encode)) { + if (latestConfig != null) { + removeById(latestConfig.getId()); + } + this.create(appParam, latest); + } else { + this.setLatestOrEffective(latest, configId, appParam.getId()); + } + } else { + SparkApplicationConfig config = getEffective(appParam.getId()); + if (config != null) { + String decode = new String(Base64.getDecoder().decode(appParam.getConfig())); + String encode = DeflaterUtils.zipString(decode.trim()); + // create... + if (!config.getContent().equals(encode)) { + this.create(appParam, latest); + } + } else { + this.create(appParam, latest); + } + } + } + + private void updateForSparkSqlJob( + SparkApplication appParam, Boolean latest, SparkApplicationConfig latestConfig) { + // get effective config + SparkApplicationConfig effectiveConfig = getEffective(appParam.getId()); + if (Utils.isEmpty(appParam.getConfig())) { + if (effectiveConfig != null) { + effectiveService.remove(appParam.getId(), EffectiveTypeEnum.SPARKCONFIG); + } + } else { + // there was no configuration before, create a new configuration + if (effectiveConfig == null) { + if (latestConfig != null) { + removeById(latestConfig.getId()); + } + this.create(appParam, latest); + } else { + String decode = new String(Base64.getDecoder().decode(appParam.getConfig())); + String encode = DeflaterUtils.zipString(decode.trim()); + // need to diff the two configs are consistent + if (!effectiveConfig.getContent().equals(encode)) { + if (latestConfig != null) { + removeById(latestConfig.getId()); + } + this.create(appParam, latest); + } + } + } + } + + /** Set not running tasks to effective and running tasks to Latest */ + @Override + public void setLatestOrEffective(Boolean latest, Long configId, Long appId) { + if (latest) { + this.setLatest(appId, configId); + } else { + this.toEffective(appId, configId); + } + } + + @Override + public void toEffective(Long appId, Long configId) { + LambdaUpdateWrapper updateWrapper = Wrappers.lambdaUpdate(); + updateWrapper.eq(SparkApplicationConfig::getAppId, appId).set(SparkApplicationConfig::getLatest, false); + this.update(updateWrapper); + effectiveService.saveOrUpdate(appId, EffectiveTypeEnum.SPARKCONFIG, configId); + } + + @Override + public SparkApplicationConfig getLatest(Long appId) { + return baseMapper.selectLatest(appId); + } + + @Override + public SparkApplicationConfig getEffective(Long appId) { + return baseMapper.selectEffective(appId); + } + + @Override + public SparkApplicationConfig get(Long id) { + SparkApplicationConfig config = getById(id); + if (config.getContent() != null) { + String unzipString = DeflaterUtils.unzipString(config.getContent()); + String encode = Base64.getEncoder().encodeToString(unzipString.getBytes()); + config.setContent(encode); + } + return config; + } + + @Override + public IPage getPage(SparkApplicationConfig config, RestRequest request) { + request.setSortField("version"); + Page page = MybatisPager.getPage(request); + IPage configList = this.baseMapper.selectPageByAppId(page, config.getAppId()); + fillEffectiveField(config.getAppId(), configList.getRecords()); + return configList; + } + + @Override + public List list(Long appId) { + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper() + .eq(SparkApplicationConfig::getAppId, appId) + .orderByDesc(SparkApplicationConfig::getVersion); + + List configList = this.baseMapper.selectList(queryWrapper); + fillEffectiveField(appId, configList); + return configList; + } + + @Override + public synchronized String readTemplate() { + if (sparkConfTemplate == null) { + try { + Resource resource = resourceLoader.getResource("classpath:spark-application.conf"); + Scanner scanner = new Scanner(resource.getInputStream()); + StringBuilder stringBuffer = new StringBuilder(); + while (scanner.hasNextLine()) { + stringBuffer.append(scanner.nextLine()).append(System.lineSeparator()); + } + scanner.close(); + String template = stringBuffer.toString(); + this.sparkConfTemplate = Base64.getEncoder().encodeToString(template.getBytes()); + } catch (Exception e) { + log.error("Read conf/spark-application.conf failed, please check your deployment"); + log.error(e.getMessage(), e); + } + } + return this.sparkConfTemplate; + } + + @Override + public void removeByAppId(Long appId) { + baseMapper.delete( + new LambdaQueryWrapper().eq(SparkApplicationConfig::getAppId, appId)); + } + + private void fillEffectiveField(Long id, List configList) { + SparkApplicationConfig effective = getEffective(id); + + if (effective == null) { + return; + } + + configList.stream() + .filter(config -> config.getId().equals(effective.getId())) + .findFirst() + .ifPresent(config -> config.setEffective(true)); + } +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkEffectiveServiceImpl.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkEffectiveServiceImpl.java new file mode 100644 index 0000000000..d1369cce25 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkEffectiveServiceImpl.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.service.impl; + +import org.apache.streampark.console.core.entity.SparkEffective; +import org.apache.streampark.console.core.enums.EffectiveTypeEnum; +import org.apache.streampark.console.core.mapper.SparkEffectiveMapper; +import org.apache.streampark.console.core.service.SparkEffectiveService; + +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Propagation; +import org.springframework.transaction.annotation.Transactional; + +import java.util.Date; + +@Slf4j +@Service +@Transactional(propagation = Propagation.SUPPORTS, readOnly = true, rollbackFor = Exception.class) +public class SparkEffectiveServiceImpl extends ServiceImpl + implements + SparkEffectiveService { + + @Override + public void remove(Long appId, EffectiveTypeEnum effectiveTypeEnum) { + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper() + .eq(SparkEffective::getAppId, appId) + .eq(SparkEffective::getTargetType, effectiveTypeEnum.getType()); + baseMapper.delete(queryWrapper); + } + + @Override + public SparkEffective get(Long appId, EffectiveTypeEnum effectiveTypeEnum) { + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper() + .eq(SparkEffective::getAppId, appId) + .eq(SparkEffective::getTargetType, effectiveTypeEnum.getType()); + return this.getOne(queryWrapper); + } + + @Override + public void saveOrUpdate(Long appId, EffectiveTypeEnum type, Long id) { + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper() + .eq(SparkEffective::getAppId, appId) + .eq(SparkEffective::getTargetType, type.getType()); + long count = count(queryWrapper); + if (count == 0) { + SparkEffective effective = new SparkEffective(); + effective.setAppId(appId); + effective.setTargetType(type.getType()); + effective.setTargetId(id); + effective.setCreateTime(new Date()); + save(effective); + } else { + update( + new LambdaUpdateWrapper() + .eq(SparkEffective::getAppId, appId) + .eq(SparkEffective::getTargetType, type.getType()) + .set(SparkEffective::getTargetId, id)); + } + } + + @Override + public void removeByAppId(Long appId) { + LambdaQueryWrapper queryWrapper = + new LambdaQueryWrapper().eq(SparkEffective::getAppId, appId); + this.remove(queryWrapper); + } +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkSqlServiceImpl.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkSqlServiceImpl.java new file mode 100644 index 0000000000..0468268800 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/service/impl/SparkSqlServiceImpl.java @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.console.core.service.impl; + +import org.apache.streampark.common.util.AssertUtils; +import org.apache.streampark.common.util.DeflaterUtils; +import org.apache.streampark.common.util.ExceptionUtils; +import org.apache.streampark.console.base.domain.RestRequest; +import org.apache.streampark.console.base.mybatis.pager.MybatisPager; +import org.apache.streampark.console.core.entity.SparkApplication; +import org.apache.streampark.console.core.entity.SparkEnv; +import org.apache.streampark.console.core.entity.SparkSql; +import org.apache.streampark.console.core.enums.CandidateTypeEnum; +import org.apache.streampark.console.core.enums.EffectiveTypeEnum; +import org.apache.streampark.console.core.mapper.SparkSqlMapper; +import org.apache.streampark.console.core.service.SparkApplicationBackUpService; +import org.apache.streampark.console.core.service.SparkEffectiveService; +import org.apache.streampark.console.core.service.SparkEnvService; +import org.apache.streampark.console.core.service.SparkSqlService; +import org.apache.streampark.spark.client.proxy.SparkShimsProxy; +import org.apache.streampark.spark.core.util.SparkSqlValidationResult; + +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Propagation; +import org.springframework.transaction.annotation.Transactional; + +import java.lang.reflect.Method; +import java.util.List; +import java.util.Optional; + +@Slf4j +@Service +@Transactional(propagation = Propagation.SUPPORTS, readOnly = true, rollbackFor = Exception.class) +public class SparkSqlServiceImpl extends ServiceImpl + implements + SparkSqlService { + + @Autowired + private SparkEffectiveService effectiveService; + + @Autowired + private SparkApplicationBackUpService backUpService; + + @Autowired + private SparkEnvService sparkEnvService; + + private static final String SPARKSQL_VALIDATOR_CLASS = "org.apache.streampark.spark.core.util.SparkSqlValidator"; + + @Override + public SparkSql getEffective(Long appId, boolean decode) { + SparkSql sparkSql = baseMapper.getEffective(appId); + if (sparkSql != null && decode) { + sparkSql.setSql(DeflaterUtils.unzipString(sparkSql.getSql())); + } + return sparkSql; + } + + @Override + public SparkSql getLatestSparkSql(Long appId, boolean decode) { + Page page = new Page<>(); + page.setCurrent(0).setSize(1).setSearchCount(false); + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper() + .eq(SparkSql::getAppId, appId) + .orderByDesc(SparkSql::getVersion); + + Page sparkSqlPage = baseMapper.selectPage(page, queryWrapper); + return Optional.ofNullable(sparkSqlPage.getRecords()) + .filter(records -> !records.isEmpty()) + .map(records -> records.get(0)) + .map( + sparkSql -> { + if (decode) { + sparkSql.setSql(DeflaterUtils.unzipString(sparkSql.getSql())); + } + return sparkSql; + }) + .orElse(null); + } + + @Override + public void create(SparkSql sparkSql) { + Integer version = this.baseMapper.getLatestVersion(sparkSql.getAppId()); + sparkSql.setVersion(version == null ? 1 : version + 1); + String sql = DeflaterUtils.zipString(sparkSql.getSql()); + sparkSql.setSql(sql); + this.save(sparkSql); + this.setCandidate(CandidateTypeEnum.NEW, sparkSql.getAppId(), sparkSql.getId()); + } + + @Override + public void setCandidate(CandidateTypeEnum candidateTypeEnum, Long appId, Long sqlId) { + this.update( + new LambdaUpdateWrapper() + .eq(SparkSql::getAppId, appId) + .set(SparkSql::getCandidate, CandidateTypeEnum.NONE.get())); + + this.update( + new LambdaUpdateWrapper() + .eq(SparkSql::getId, sqlId) + .set(SparkSql::getCandidate, candidateTypeEnum.get())); + } + + @Override + public List listSparkSqlHistory(Long appId) { + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper() + .eq(SparkSql::getAppId, appId) + .orderByDesc(SparkSql::getVersion); + + List sqlList = this.baseMapper.selectList(queryWrapper); + SparkSql effective = getEffective(appId, false); + if (effective != null) { + sqlList.stream() + .filter(sql -> sql.getId().equals(effective.getId())) + .findFirst() + .ifPresent(sql -> sql.setEffective(true)); + } + return sqlList; + } + + @Override + public SparkSql getCandidate(Long appId, CandidateTypeEnum candidateTypeEnum) { + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper().eq(SparkSql::getAppId, appId); + if (candidateTypeEnum == null) { + queryWrapper.gt(SparkSql::getCandidate, CandidateTypeEnum.NONE.get()); + } else { + queryWrapper.eq(SparkSql::getCandidate, candidateTypeEnum.get()); + } + return baseMapper.selectOne(queryWrapper); + } + + @Override + public void toEffective(Long appId, Long sqlId) { + effectiveService.saveOrUpdate(appId, EffectiveTypeEnum.SPARKSQL, sqlId); + } + + @Override + public void cleanCandidate(Long id) { + this.update( + new LambdaUpdateWrapper() + .eq(SparkSql::getId, id) + .set(SparkSql::getCandidate, CandidateTypeEnum.NONE.get())); + } + + @Override + public void removeByAppId(Long appId) { + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper().eq(SparkSql::getAppId, appId); + baseMapper.delete(queryWrapper); + } + + @Override + @Transactional(propagation = Propagation.REQUIRES_NEW, rollbackFor = Exception.class) + public void rollback(SparkApplication application) { + SparkSql sql = getCandidate(application.getId(), CandidateTypeEnum.HISTORY); + AssertUtils.notNull(sql); + try { + // check and backup current job + SparkSql effectiveSql = getEffective(application.getId(), false); + AssertUtils.notNull(effectiveSql); + // rollback history sql + backUpService.rollbackSparkSql(application, sql); + } catch (Exception e) { + log.error("Backup and Roll back SparkSql before start failed."); + throw new RuntimeException(e.getMessage()); + } + } + + @Override + public SparkSqlValidationResult verifySql(String sql, Long versionId) { + SparkEnv sparkEnv = sparkEnvService.getById(versionId); + return SparkShimsProxy.proxyVerifySql( + sparkEnv.getSparkVersion(), + classLoader -> { + try { + Class clazz = classLoader.loadClass(SPARKSQL_VALIDATOR_CLASS); + Method method = clazz.getDeclaredMethod("verifySql", String.class); + method.setAccessible(true); + Object result = method.invoke(null, sql); + if (result == null) { + return null; + } + return SparkShimsProxy.getObject(this.getClass().getClassLoader(), result); + } catch (Throwable e) { + log.error( + "verifySql invocationTargetException: {}", + ExceptionUtils.stringifyException(e)); + } + return null; + }); + } + + @Override + public List listByTeamId(Long teamId) { + return this.baseMapper.selectSqlsByTeamId(teamId); + } + + @Override + public IPage getPage(Long appId, RestRequest request) { + request.setSortField("version"); + Page page = MybatisPager.getPage(request); + LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper().eq(SparkSql::getAppId, appId); + IPage sqlList = this.baseMapper.selectPage(page, queryWrapper); + SparkSql effectiveSql = baseMapper.getEffective(appId); + if (effectiveSql != null) { + for (SparkSql sql : sqlList.getRecords()) { + if (sql.getId().equals(effectiveSql.getId())) { + sql.setEffective(true); + break; + } + } + } + return sqlList; + } +} diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/utils/AlertTemplateUtils.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/utils/AlertTemplateUtils.java index 9ad9c0e33a..87ccdabca9 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/utils/AlertTemplateUtils.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/utils/AlertTemplateUtils.java @@ -119,8 +119,8 @@ public static AlertTemplate createAlertTemplate(AlertProbeMsg alertProbeMsg) { public static AlertTemplate createAlertTemplate(SparkApplication application, SparkAppStateEnum appState) { return AlertTemplate.builder() .duration(application.getStartTime(), application.getEndTime()) - .jobName(application.getJobName()) - .link(application.getSparkExecutionMode(), application.getJobId()) + .jobName(application.getAppName()) + .link(application.getSparkExecutionMode(), application.getAppId()) .startTime(application.getStartTime()) .endTime(application.getEndTime()) .restart(application.isNeedRestartOnFailed(), application.getRestartCount()) @@ -129,9 +129,9 @@ public static AlertTemplate createAlertTemplate(SparkApplication application, Sp .type(AlertTypeEnum.EMAIL.getCode()) .title( String.format( - "%s %s %s", ALERT_TITLE_PREFIX, application.getJobName(), appState.name())) + "%s %s %s", ALERT_TITLE_PREFIX, application.getAppName(), appState.name())) .subject( - String.format("%s %s %s", ALERT_SUBJECT_PREFIX, application.getJobName(), appState)) + String.format("%s %s %s", ALERT_SUBJECT_PREFIX, application.getAppName(), appState)) .status(appState.name()) .build(); } diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/watcher/SparkAppHttpWatcher.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/watcher/SparkAppHttpWatcher.java index 86b541407d..b2433eeda7 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/watcher/SparkAppHttpWatcher.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/watcher/SparkAppHttpWatcher.java @@ -19,15 +19,13 @@ import org.apache.streampark.common.util.YarnUtils; import org.apache.streampark.console.base.util.JacksonUtils; -import org.apache.streampark.console.base.util.Tuple2; -import org.apache.streampark.console.base.util.Tuple3; import org.apache.streampark.console.core.bean.AlertTemplate; import org.apache.streampark.console.core.entity.SparkApplication; import org.apache.streampark.console.core.enums.SparkAppStateEnum; import org.apache.streampark.console.core.enums.SparkOptionStateEnum; import org.apache.streampark.console.core.enums.StopFromEnum; import org.apache.streampark.console.core.metrics.spark.Job; -import org.apache.streampark.console.core.metrics.spark.SparkExecutor; +import org.apache.streampark.console.core.metrics.spark.SparkApplicationSummary; import org.apache.streampark.console.core.metrics.yarn.YarnAppInfo; import org.apache.streampark.console.core.service.alert.AlertService; import org.apache.streampark.console.core.service.application.SparkApplicationActionService; @@ -59,7 +57,6 @@ import java.util.Date; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; @@ -206,25 +203,24 @@ private void getStateFromYarn(SparkApplication application) throws Exception { } if (SparkAppStateEnum.isEndState(sparkAppStateEnum.getValue())) { log.info( - "[StreamPark][SparkAppHttpWatcher] getStateFromYarn, app {} was ended, jobId is {}, state is {}", + "[StreamPark][SparkAppHttpWatcher] getStateFromYarn, app {} was ended, appId is {}, state is {}", application.getId(), - application.getJobId(), + application.getAppId(), sparkAppStateEnum); application.setEndTime(new Date()); } if (SparkAppStateEnum.RUNNING == sparkAppStateEnum) { - Tuple3 resourceStatus = getResourceStatus(application); - double memoryUsed = resourceStatus.t1; - double maxMemory = resourceStatus.t2; - double totalCores = resourceStatus.t3; - log.info( - "[StreamPark][SparkAppHttpWatcher] getStateFromYarn, app {} was running, jobId is {}, memoryUsed: {}MB, maxMemory: {}MB, totalCores: {}", - application.getId(), - application.getJobId(), - String.format("%.2f", memoryUsed), - String.format("%.2f", maxMemory), - totalCores); - // TODO: Modify the table structure to persist the results + SparkApplicationSummary summary; + try { + summary = httpStageAndTaskStatus(application); + summary.setUsedMemory(Long.parseLong(yarnAppInfo.getApp().getAllocatedMB())); + summary.setUsedVCores(Long.parseLong(yarnAppInfo.getApp().getAllocatedVCores())); + application.fillRunningMetrics(summary); + } catch (IOException e) { + // This may happen when the job is finished right after the job status is abtained from yarn. + log.warn( + "[StreamPark][SparkAppHttpWatcher] getStateFromYarn, fetch spark job status failed. The job may have already been finished."); + } } application.setState(sparkAppStateEnum.getValue()); cleanOptioning(optionStateEnum, application.getId()); @@ -238,63 +234,19 @@ private void getStateFromYarn(SparkApplication application) throws Exception { } } } catch (Exception e) { - throw new RuntimeException("[StreamPark][SparkAppHttpWatcher] getStateFromYarn failed!"); + throw new RuntimeException("[StreamPark][SparkAppHttpWatcher] getStateFromYarn failed!", e); } } } - /** - * Calculate spark task progress from Spark rest api. (proxyed by yarn) Only available when yarn - * application status is RUNNING. - * - * @param application - * @return task progress - * @throws Exception - */ - private double getTasksProgress(SparkApplication application) throws Exception { - Job[] jobs = httpJobsStatus(application); - if (jobs.length == 0) { - return 0.0; - } - Optional> jobsSumOption = - Arrays.stream(jobs) - .map(job -> new Tuple2<>(job.getNumCompletedTasks(), job.getNumTasks())) - .reduce((val1, val2) -> new Tuple2<>(val1.t1 + val2.t1, val1.t2 + val2.t2)); - Tuple2 jobsSum = jobsSumOption.get(); - return jobsSum.t1 * 1.0 / jobsSum.t2; - } - - private Tuple3 getResourceStatus(SparkApplication application) throws Exception { - SparkExecutor[] executors = httpExecutorsStatus(application); - if (executors.length == 0) { - return new Tuple3<>(0.0, 0.0, 0L); - } - SparkExecutor totalExecutor = - Arrays.stream(executors) - .reduce( - (e1, e2) -> { - SparkExecutor temp = new SparkExecutor(); - temp.setMemoryUsed(e1.getMemoryUsed() + e2.getMemoryUsed()); - temp.setMaxMemory(e1.getMaxMemory() + e2.getMaxMemory()); - temp.setTotalCores(e1.getTotalCores() + e2.getTotalCores()); - return temp; - }) - .get(); - return new Tuple3<>( - totalExecutor.getMemoryUsed() * 1.0 / 1024 / 1024, - totalExecutor.getMaxMemory() * 1.0 / 1024 / 1024, - totalExecutor.getTotalCores()); - } - private void doPersistMetrics(SparkApplication application, boolean stopWatch) { if (SparkAppStateEnum.isEndState(application.getState())) { - application.setOverview(null); - application.setTotalTM(null); - application.setTotalSlot(null); - application.setTotalTask(null); - application.setAvailableSlot(null); - application.setJmMemory(null); - application.setTmMemory(null); + application.setUsedMemory(null); + application.setUsedVCores(null); + application.setNumTasks(null); + application.setNumCompletedTasks(null); + application.setNumStages(null); + application.setNumCompletedStages(null); unWatching(application.getId()); } else if (stopWatch) { unWatching(application.getId()); @@ -313,7 +265,7 @@ private void cleanOptioning(SparkOptionStateEnum optionStateEnum, Long key) { /** set current option state */ public static void setOptionState(Long appId, SparkOptionStateEnum state) { - log.info("[StreamPark][SparkAppHttpWatcher] setOptioning"); + log.info("[StreamPark][SparkAppHttpWatcher] setOptioning"); OPTIONING.put(appId, state); if (SparkOptionStateEnum.STOPPING == state) { STOP_FROM_MAP.put(appId, StopFromEnum.STREAMPARK); @@ -347,22 +299,37 @@ public static Collection getWatchingApps() { } private YarnAppInfo httpYarnAppInfo(SparkApplication application) throws Exception { - String reqURL = "ws/v1/cluster/apps/".concat(application.getJobId()); + String reqURL = "ws/v1/cluster/apps/".concat(application.getAppId()); return yarnRestRequest(reqURL, YarnAppInfo.class); } - private Job[] httpJobsStatus(SparkApplication application) throws Exception { + private Job[] httpJobsStatus(SparkApplication application) throws IOException { String format = "proxy/%s/api/v1/applications/%s/jobs"; - String reqURL = String.format(format, application.getJobId(), application.getJobId()); + String reqURL = String.format(format, application.getAppId(), application.getAppId()); return yarnRestRequest(reqURL, Job[].class); } - private SparkExecutor[] httpExecutorsStatus(SparkApplication application) throws Exception { - // "executor" is used for active executors only. - // "allexecutor" is used for all executors including the dead. - String format = "proxy/%s/api/v1/applications/%s/executors"; - String reqURL = String.format(format, application.getJobId(), application.getJobId()); - return yarnRestRequest(reqURL, SparkExecutor[].class); + /** + * Calculate spark stage and task metric from yarn rest api. + * Only available when yarn application status is RUNNING. + * + * @param application + * @return task progress + * @throws Exception + */ + private SparkApplicationSummary httpStageAndTaskStatus(SparkApplication application) throws IOException { + Job[] jobs = httpJobsStatus(application); + SparkApplicationSummary summary = new SparkApplicationSummary(0L, 0L, 0L, 0L, null, null); + if (jobs == null) { + return summary; + } + Arrays.stream(jobs).forEach(job -> { + summary.setNumTasks(job.getNumTasks() + summary.getNumTasks()); + summary.setNumCompletedTasks(job.getNumCompletedTasks() + summary.getNumCompletedTasks()); + summary.setNumStages(job.getStageIds().size() + summary.getNumStages()); + summary.setNumStages(job.getNumCompletedStages() + summary.getNumCompletedStages()); + }); + return summary; } private T yarnRestRequest(String url, Class clazz) throws IOException { diff --git a/streampark-console/streampark-console-service/src/main/resources/db/schema-h2.sql b/streampark-console/streampark-console-service/src/main/resources/db/schema-h2.sql index 037a4ab0e3..349a5e1168 100644 --- a/streampark-console/streampark-console-service/src/main/resources/db/schema-h2.sql +++ b/streampark-console/streampark-console-service/src/main/resources/db/schema-h2.sql @@ -491,102 +491,173 @@ create table if not exists `t_yarn_queue` ( primary key (`id`) ); +-- ---------------------------- +-- Table structure for t_flink_catalog +-- ---------------------------- +create table if not exists t_flink_catalog ( + `id` BIGINT GENERATED ALWAYS AS IDENTITY (START WITH 1 INCREMENT BY 1), + `team_id` bigint not null, + `user_id` bigint default null, + `catalog_type` varchar(255) not NULL, + `catalog_name` VARCHAR(255) NOT NULL, + `configuration` text, + `create_time` TIMESTAMP WITHOUT TIME ZONE DEFAULT NULL, + `update_time` TIMESTAMP WITHOUT TIME ZONE DEFAULT NULL, + CONSTRAINT uniq_catalog_name UNIQUE (`catalog_name`) +); + -- ---------------------------- -- Table structure for t_spark_env -- ---------------------------- create table if not exists `t_spark_env` ( - `id` bigint generated by default as identity not null, - `spark_name` varchar(128) not null comment 'spark instance name', - `spark_home` varchar(255) not null comment 'spark home path', - `version` varchar(64) not null comment 'spark version', - `scala_version` varchar(64) not null comment 'scala version of spark', - `spark_conf` text not null comment 'spark-conf', - `is_default` tinyint not null default 0 comment 'whether default version or not', - `description` varchar(255) default null comment 'description', - `create_time` datetime not null default current_timestamp comment 'create time', - primary key(`id`), - unique (`spark_name`) - ); + `id` bigint generated by default as identity not null, + `spark_name` varchar(128) not null comment 'spark instance name', + `spark_home` varchar(255) not null comment 'spark home path', + `version` varchar(64) not null comment 'spark version', + `scala_version` varchar(64) not null comment 'scala version of spark', + `spark_conf` text not null comment 'spark-conf', + `is_default` tinyint not null default 0 comment 'whether default version or not', + `description` varchar(255) default null comment 'description', + `create_time` datetime not null default current_timestamp comment 'create time', + primary key(`id`), + unique (`spark_name`) +); + -- ---------------------------- -- Table structure for t_spark_app -- ---------------------------- create table if not exists `t_spark_app` ( - `id` bigint generated by default as identity not null, - `team_id` bigint not null, - `job_type` tinyint default null, - `execution_mode` tinyint default null, - `resource_from` tinyint default null, - `project_id` bigint default null, - `job_name` varchar(255) default null, - `module` varchar(255) default null, - `jar` varchar(255) default null, - `jar_check_sum` bigint default null, - `main_class` varchar(255) default null, - `args` text, - `options` text, - `hot_params` text , - `user_id` bigint default null, - `app_id` varchar(64) default null, - `app_type` tinyint default null, - `duration` bigint default null, - `job_id` varchar(64) default null, - `job_manager_url` varchar(255) default null, - `version_id` bigint default null, - `cluster_id` varchar(45) default null, - `k8s_name` varchar(63) default null, - `k8s_namespace` varchar(63) default null, - `spark_image` varchar(128) default null, - `state` int default null, - `restart_size` int default null, - `restart_count` int default null, - `cp_threshold` int default null, - `cp_max_failure_interval` int default null, - `cp_failure_rate_interval` int default null, - `cp_failure_action` tinyint default null, - `dynamic_properties` text , - `description` varchar(255) default null, - `resolve_order` tinyint default null, - `k8s_rest_exposed_type` tinyint default null, - `jm_memory` int default null, - `tm_memory` int default null, - `total_task` int default null, - `total_tm` int default null, - `total_slot` int default null, - `available_slot` int default null, - `option_state` tinyint default null, - `tracking` tinyint default null, - `create_time` datetime not null default current_timestamp comment 'create time', - `modify_time` datetime not null default current_timestamp comment 'modify time', - `option_time` datetime default null, - `release` tinyint default 1, - `build` tinyint default 1, - `start_time` datetime default null, - `end_time` datetime default null, - `alert_id` bigint default null, - `k8s_pod_template` text , - `k8s_jm_pod_template` text , - `k8s_tm_pod_template` text , - `k8s_hadoop_integration` tinyint default 0, - `spark_cluster_id` bigint default null, - `ingress_template` text , - `default_mode_ingress` text , - `tags` varchar(500) default null, - `hadoop_user` varchar(500) default null, - primary key(`id`) - ); - - -- ---------------------------- - -- Table structure for t_flink_app - -- ---------------------------- - create table if not exists t_flink_catalog ( - `id` BIGINT GENERATED ALWAYS AS IDENTITY (START WITH 1 INCREMENT BY 1), - `team_id` bigint not null, - `user_id` bigint default null, - `catalog_type` varchar(255) not NULL, - `catalog_name` VARCHAR(255) NOT NULL, - `configuration` text, - `create_time` TIMESTAMP WITHOUT TIME ZONE DEFAULT NULL, - `update_time` TIMESTAMP WITHOUT TIME ZONE DEFAULT NULL, - CONSTRAINT uniq_catalog_name UNIQUE (`catalog_name`) - ); + `id` bigint generated by default as identity not null, + `team_id` bigint not null, + `job_type` tinyint default null comment '(1)custom code(2)spark SQL', + `app_type` tinyint default null comment '(1)Apache Spark(2)StreamPark Spark', + `version_id` bigint default null comment 'spark version', + `app_name` varchar(255) default null comment 'spark.app.name', + `execution_mode` tinyint default null comment 'spark.submit.deployMode(1)cluster(2)client', + `resource_from` tinyint default null, + `project_id` bigint default null, + `module` varchar(255) default null, + `main_class` varchar(255) default null comment 'The entry point for your application (e.g. org.apache.spark.examples.SparkPi)', + `jar` varchar(255) default null, + `jar_check_sum` bigint default null, + `app_properties` text comment 'Arbitrary Spark configuration property in key=value format (e.g. spark.driver.cores=1)', + `app_args` text comment 'Arguments passed to the main method of your main class', + `app_id` varchar(64) default null comment '(1)application_id on yarn(2)driver_pod_name on k8s', + `yarn_queue` varchar(128) default null, + `k8s_master_url` varchar(128) default null, + `k8s_container_image` varchar(128) default null, + `k8s_image_pull_policy` tinyint default 1, + `k8s_service_account` varchar(64) default null, + `k8s_namespace` varchar(64) default null, + `hadoop_user` varchar(64) default null, + `restart_size` int default null, + `restart_count` int default null, + `state` int default null, + `options` text, + `option_state` tinyint default null, + `option_time` datetime default null, + `user_id` bigint default null, + `description` varchar(255) default null, + `tracking` tinyint default null, + `release` tinyint default 1, + `build` tinyint default 1, + `alert_id` bigint default null, + `create_time` datetime default null, + `modify_time` datetime default null, + `start_time` datetime default null, + `end_time` datetime default null, + `duration` bigint default null, + `tags` varchar(500) default null, + `driver_cores` varchar(64) default null, + `driver_memory` varchar(64) default null, + `executor_cores` varchar(64) default null, + `executor_memory` varchar(64) default null, + `executor_max_nums` varchar(64) default null, + `num_tasks` bigint default null, + `num_completed_tasks` bigint default null, + `num_stages` bigint default null, + `num_completed_stages` bigint default null, + `used_memory` bigint default null, + `used_v_cores` bigint default null, + primary key(`id`) +); + + +-- ---------------------------- +-- Table structure for t_spark_log +-- ---------------------------- +create table if not exists `t_spark_log` ( + `id` bigint generated by default as identity not null, + `app_id` bigint default null, + `spark_app_id` varchar(64) default null, + `track_url` varchar(255) default null, + `success` tinyint default null, + `exception` text , + `option_time` datetime default null, + `option_name` tinyint default null, + `user_id` bigint default null comment 'operator user id', + primary key(`id`) +); + + +-- ---------------------------- +-- Table structure for t_spark_effective +-- ---------------------------- +create table if not exists `t_spark_effective` ( + `id` bigint generated by default as identity not null, + `app_id` bigint not null, + `target_type` tinyint not null comment '1) config 2) spark sql', + `target_id` bigint not null comment 'configId or sqlId', + `create_time` datetime default null comment 'create time', + primary key(`id`), + unique (`app_id`,`target_type`) +); + + +-- ---------------------------- +-- Table structure for t_spark_config +-- ---------------------------- +create table if not exists `t_spark_config` ( + `id` bigint generated by default as identity not null, + `app_id` bigint not null, + `format` tinyint not null default 0, + `version` int not null, + `latest` tinyint not null default 0, + `content` text not null, + `create_time` datetime default null comment 'create time', + primary key(`id`) +); + + +-- ---------------------------- +-- Table structure for t_spark_sql +-- ---------------------------- +create table if not exists `t_spark_sql` ( + `id` bigint generated by default as identity not null, + `app_id` bigint default null, + `sql` text , + `team_resource` varchar(64) default null, + `dependency` text , + `version` int default null, + `candidate` tinyint not null default 1, + `create_time` datetime default null comment 'create time', + primary key(`id`) +); + + +-- ---------------------------- +-- Table structure for t_spark_app_backup +-- ---------------------------- +create table if not exists `t_spark_app_backup` ( + `id` bigint generated by default as identity not null, + `app_id` bigint default null, + `sql_id` bigint default null, + `config_id` bigint default null, + `version` int default null, + `path` varchar(128) default null, + `description` varchar(255) default null, + `create_time` datetime default null comment 'create time', + `modify_time` datetime default null comment 'modify time', + primary key(`id`) +); diff --git a/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkApplicationConfigMapper.xml b/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkApplicationConfigMapper.xml new file mode 100644 index 0000000000..3cd6cb74e4 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkApplicationConfigMapper.xml @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkApplicationMapper.xml b/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkApplicationMapper.xml index 80217cf9dd..b58fe3a4b7 100644 --- a/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkApplicationMapper.xml +++ b/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkApplicationMapper.xml @@ -20,62 +20,57 @@ - - - - - - - - - - - - - - - - + + + - + + + + - - - - - - - - - - - + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + @@ -83,34 +78,6 @@ set option_state = 0 - - - - - - - - - - - update t_spark_app - - job_id=#{app.jobId}, + + app_id=#{app.appId}, tracking=#{app.tracking}, @@ -295,31 +204,31 @@ - total_tm=null, - total_slot=null, - total_task=null, - available_slot=null, - jm_memory=null, - tm_memory=null, + num_tasks=null, + num_completed_tasks=null, + num_stages=null, + num_completed_stages=null, + used_memory=null, + used_v_cores=null, - - total_tm=#{app.totalTM}, + + num_tasks=#{app.numTasks}, - - total_slot=#{app.totalSlot}, + + num_completed_tasks=#{app.numCompletedTasks}, - - total_task=#{app.totalTask}, + + num_stages=#{app.numStages}, - - available_slot=#{app.availableSlot}, + + num_completed_stages=#{app.numCompletedStages}, - - jm_memory=#{app.jmMemory}, + + used_memory=#{app.usedMemory}, - - tm_memory=#{app.tmMemory}, + + used_v_cores=#{app.usedVCores}, diff --git a/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkEffectiveMapper.xml b/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkEffectiveMapper.xml new file mode 100644 index 0000000000..88f4f1d5b1 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkEffectiveMapper.xml @@ -0,0 +1,27 @@ + + + + + + + + + + + + diff --git a/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkSqlMapper.xml b/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkSqlMapper.xml new file mode 100644 index 0000000000..87f7a9010f --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/resources/mapper/core/SparkSqlMapper.xml @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/streampark-console/streampark-console-service/src/main/resources/spark-application.conf b/streampark-console/streampark-console-service/src/main/resources/spark-application.conf new file mode 100644 index 0000000000..c9d30f409b --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/resources/spark-application.conf @@ -0,0 +1,59 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +spark: #@see: https://spark.apache.org/docs/latest/configuration.html + # scheduling + driver: + cores: + memory: + memoryOverhead: + memoryOverheadFactor: + executor: + instances: 4 + cores: + memory: + memoryOverhead: + memoryOverheadFactor: + # dynamic allocation + dynamicAllocation: + enabled: + initialExecutors: + minExecutors: + maxExecutors: + # memory management + memory: + fraction: + storageFraction: + offHeap.enabled: + offHeap.size: + # shuffle + shuffle: + file.buffer: + memoryFraction: + io.maxRetries: + io.retryWait: + compress: + consolidateFiles: + sort.bypassMergeThreshold: + # compression and serialization + broadcast.compress: + checkpoint.compress: + rdd.compress: + io.compression.codec: + serializer: +app: # user's parameter + #$key: $value diff --git a/streampark-flink/streampark-flink-packer/src/main/scala/org/apache/streampark/flink/packer/pipeline/BuildRequest.scala b/streampark-flink/streampark-flink-packer/src/main/scala/org/apache/streampark/flink/packer/pipeline/BuildRequest.scala index e85066c73c..344b0b6048 100644 --- a/streampark-flink/streampark-flink-packer/src/main/scala/org/apache/streampark/flink/packer/pipeline/BuildRequest.scala +++ b/streampark-flink/streampark-flink-packer/src/main/scala/org/apache/streampark/flink/packer/pipeline/BuildRequest.scala @@ -18,7 +18,7 @@ package org.apache.streampark.flink.packer.pipeline import org.apache.streampark.common.conf.{FlinkVersion, Workspace} -import org.apache.streampark.common.enums.{FlinkDevelopmentMode, FlinkExecutionMode} +import org.apache.streampark.common.enums.{FlinkDevelopmentMode, FlinkExecutionMode, SparkDevelopmentMode} import org.apache.streampark.flink.kubernetes.model.K8sPodTemplates import org.apache.streampark.flink.packer.docker.DockerConf import org.apache.streampark.flink.packer.maven.DependencyInfo @@ -128,6 +128,6 @@ case class SparkYarnApplicationBuildRequest( mainClass: String, localWorkspace: String, yarnProvidedPath: String, - developmentMode: FlinkDevelopmentMode, + developmentMode: SparkDevelopmentMode, dependencyInfo: DependencyInfo) extends BuildParam diff --git a/streampark-flink/streampark-flink-packer/src/main/scala/org/apache/streampark/flink/packer/pipeline/impl/SparkYarnApplicationBuildPipeline.scala b/streampark-flink/streampark-flink-packer/src/main/scala/org/apache/streampark/flink/packer/pipeline/impl/SparkYarnApplicationBuildPipeline.scala index 9b91d1dcb8..1068a82584 100644 --- a/streampark-flink/streampark-flink-packer/src/main/scala/org/apache/streampark/flink/packer/pipeline/impl/SparkYarnApplicationBuildPipeline.scala +++ b/streampark-flink/streampark-flink-packer/src/main/scala/org/apache/streampark/flink/packer/pipeline/impl/SparkYarnApplicationBuildPipeline.scala @@ -18,7 +18,7 @@ package org.apache.streampark.flink.packer.pipeline.impl import org.apache.streampark.common.conf.Workspace -import org.apache.streampark.common.enums.FlinkDevelopmentMode +import org.apache.streampark.common.enums.SparkDevelopmentMode import org.apache.streampark.common.fs.{FsOperator, HdfsOperator, LfsOperator} import org.apache.streampark.common.util.Implicits._ import org.apache.streampark.flink.packer.maven.MavenTool @@ -46,7 +46,7 @@ class SparkYarnApplicationBuildPipeline(request: SparkYarnApplicationBuildReques override protected def buildProcess(): SimpleBuildResponse = { execStep(1) { request.developmentMode match { - case FlinkDevelopmentMode.FLINK_SQL => + case SparkDevelopmentMode.SPARK_SQL => LfsOperator.mkCleanDirs(request.localWorkspace) HdfsOperator.mkCleanDirs(request.yarnProvidedPath) case _ => @@ -57,7 +57,7 @@ class SparkYarnApplicationBuildPipeline(request: SparkYarnApplicationBuildReques val mavenJars = execStep(2) { request.developmentMode match { - case FlinkDevelopmentMode.FLINK_SQL => + case SparkDevelopmentMode.SPARK_SQL => val mavenArts = MavenTool.resolveArtifacts(request.dependencyInfo.mavenArts) mavenArts.map(_.getAbsolutePath) ++ request.dependencyInfo.extJarLibs diff --git a/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/StopRequest.scala b/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/StopRequest.scala index 1adcc862e8..5a73991b7b 100644 --- a/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/StopRequest.scala +++ b/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/StopRequest.scala @@ -28,4 +28,4 @@ case class StopRequest( sparkVersion: SparkVersion, executionMode: SparkExecutionMode, @Nullable properties: JavaMap[String, String], - jobId: String) + appId: String) diff --git a/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/SubmitRequest.scala b/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/SubmitRequest.scala index 3c512d5398..e92582a2e3 100644 --- a/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/SubmitRequest.scala +++ b/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/SubmitRequest.scala @@ -36,15 +36,15 @@ import java.nio.file.Files case class SubmitRequest( sparkVersion: SparkVersion, executionMode: SparkExecutionMode, - properties: JavaMap[String, String], sparkYaml: String, developmentMode: SparkDevelopmentMode, id: Long, - jobId: String, appName: String, + mainClass: String, appConf: String, + appProperties: JavaMap[String, String], + appArgs: JavaList[String], applicationType: ApplicationType, - args: String, @Nullable hadoopUser: String, @Nullable buildResult: BuildResult, @Nullable extraParameter: JavaMap[String, Any]) { @@ -53,23 +53,18 @@ case class SubmitRequest( "spark.driver.cores" -> "1", "spark.driver.memory" -> "1g", "spark.executor.cores" -> "1", - "spark.executor.memory" -> "1g") + "spark.executor.memory" -> "1g", + "spark.executor.instances" -> "2") - private[this] lazy val appProperties: Map[String, String] = getParameterMap( + lazy val sparkParameterMap: Map[String, String] = getParameterMap( KEY_SPARK_PROPERTY_PREFIX) lazy val appMain: String = this.developmentMode match { case SparkDevelopmentMode.SPARK_SQL => Constant.STREAMPARK_SPARKSQL_CLIENT_CLASS - case SparkDevelopmentMode.CUSTOM_CODE => appProperties(KEY_FLINK_APPLICATION_MAIN_CLASS) + case SparkDevelopmentMode.CUSTOM_CODE | SparkDevelopmentMode.PYSPARK => mainClass case SparkDevelopmentMode.UNKNOWN => throw new IllegalArgumentException("Unknown deployment Mode") } - lazy val effectiveAppName: String = if (this.appName == null) { - appProperties(KEY_FLINK_APP_NAME) - } else { - this.appName - } - lazy val userJarPath: String = { executionMode match { case _ => @@ -78,10 +73,6 @@ case class SubmitRequest( } } - def hasProp(key: String): Boolean = MapUtils.isNotEmpty(properties) && properties.containsKey(key) - - def getProp(key: String): Any = properties.get(key) - def hasExtra(key: String): Boolean = MapUtils.isNotEmpty(extraParameter) && extraParameter.containsKey(key) def getExtra(key: String): Any = extraParameter.get(key) @@ -124,7 +115,6 @@ case class SubmitRequest( map .filter(_._1.startsWith(prefix)) .filter(_._2.nonEmpty) - .map(x => x._1.drop(prefix.length) -> x._2) } } @@ -163,11 +153,11 @@ case class SubmitRequest( case _ => if (this.buildResult == null) { throw new Exception( - s"[spark-submit] current job: ${this.effectiveAppName} was not yet built, buildResult is empty") + s"[spark-submit] current job: $appName was not yet built, buildResult is empty") } if (!this.buildResult.pass) { throw new Exception( - s"[spark-submit] current job ${this.effectiveAppName} build failed, please check") + s"[spark-submit] current job $appName build failed, please check") } } } diff --git a/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/SubmitResponse.scala b/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/SubmitResponse.scala index da5720a2df..0b58da799c 100644 --- a/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/SubmitResponse.scala +++ b/streampark-spark/streampark-spark-client/streampark-spark-client-api/src/main/scala/org/apache/streampark/spark/client/bean/SubmitResponse.scala @@ -20,5 +20,5 @@ package org.apache.streampark.spark.client.bean import org.apache.streampark.common.util.Implicits.JavaMap case class SubmitResponse( - clusterId: String, - sparkConfig: JavaMap[String, String]) + var sparkAppId: String, + sparkProperties: JavaMap[String, String]) diff --git a/streampark-spark/streampark-spark-client/streampark-spark-client-core/src/main/scala/org/apache/streampark/spark/client/impl/YarnClient.scala b/streampark-spark/streampark-spark-client/streampark-spark-client-core/src/main/scala/org/apache/streampark/spark/client/impl/YarnClient.scala index 60648c15d1..15cf4d60b2 100644 --- a/streampark-spark/streampark-spark-client/streampark-spark-client-core/src/main/scala/org/apache/streampark/spark/client/impl/YarnClient.scala +++ b/streampark-spark/streampark-spark-client/streampark-spark-client-core/src/main/scala/org/apache/streampark/spark/client/impl/YarnClient.scala @@ -17,6 +17,7 @@ package org.apache.streampark.spark.client.impl +import org.apache.streampark.common.conf.ConfigKeys.{KEY_SPARK_YARN_AM_NODE_LABEL, KEY_SPARK_YARN_EXECUTOR_NODE_LABEL, KEY_SPARK_YARN_QUEUE, KEY_SPARK_YARN_QUEUE_LABEL, KEY_SPARK_YARN_QUEUE_NAME} import org.apache.streampark.common.enums.SparkExecutionMode import org.apache.streampark.common.util.HadoopUtils import org.apache.streampark.common.util.Implicits._ @@ -36,20 +37,20 @@ object YarnClient extends SparkClientTrait { private lazy val sparkHandles = new ConcurrentHashMap[String, SparkAppHandle]() override def doStop(stopRequest: StopRequest): StopResponse = { - val sparkAppHandle = sparkHandles.remove(stopRequest.jobId) + val sparkAppHandle = sparkHandles.remove(stopRequest.appId) if (sparkAppHandle != null) { Try(sparkAppHandle.kill()) match { case Success(_) => - logger.info(s"[StreamPark][Spark][YarnClient] spark job: ${stopRequest.jobId} is stopped successfully.") + logger.info(s"[StreamPark][Spark][YarnClient] spark job: ${stopRequest.appId} is stopped successfully.") StopResponse(null) case Failure(e) => logger.error("[StreamPark][Spark][YarnClient] sparkAppHandle kill failed. Try kill by yarn", e) - yarnKill(stopRequest.jobId) + yarnKill(stopRequest.appId) StopResponse(null) } } else { - logger.warn(s"[StreamPark][Spark][YarnClient] spark job: ${stopRequest.jobId} is not existed. Try kill by yarn") - yarnKill(stopRequest.jobId) + logger.warn(s"[StreamPark][Spark][YarnClient] spark job: ${stopRequest.appId} is not existed. Try kill by yarn") + yarnKill(stopRequest.appId) StopResponse(null) } } @@ -73,11 +74,11 @@ object YarnClient extends SparkClientTrait { // 3) launch Try(launch(launcher)) match { case Success(handle: SparkAppHandle) => - logger.info(s"[StreamPark][Spark][YarnClient] spark job: ${submitRequest.effectiveAppName} is submit successful, " + + logger.info(s"[StreamPark][Spark][YarnClient] spark job: ${submitRequest.appName} is submit successful, " + s"appid: ${handle.getAppId}, " + s"state: ${handle.getState}") sparkHandles += handle.getAppId -> handle - SubmitResponse(handle.getAppId, submitRequest.properties) + SubmitResponse(handle.getAppId, submitRequest.appProperties) case Failure(e) => throw e } } @@ -89,7 +90,7 @@ object YarnClient extends SparkClientTrait { override def infoChanged(sparkAppHandle: SparkAppHandle): Unit = {} override def stateChanged(handle: SparkAppHandle): Unit = { if (handle.getAppId != null) { - logger.info("{} stateChanged :{}", Array(handle.getAppId, handle.getState.toString)) + logger.info(s"${handle.getAppId} stateChanged : ${handle.getState.toString}") } else { logger.info("stateChanged :{}", handle.getState.toString) } @@ -110,7 +111,7 @@ object YarnClient extends SparkClientTrait { .setSparkHome(submitRequest.sparkVersion.sparkHome) .setAppResource(submitRequest.userJarPath) .setMainClass(submitRequest.appMain) - .setAppName(submitRequest.effectiveAppName) + .setAppName(submitRequest.appName) .setConf( "spark.yarn.jars", submitRequest.hdfsWorkspace.sparkLib + "/*.jar") @@ -119,24 +120,40 @@ object YarnClient extends SparkClientTrait { .setDeployMode(submitRequest.executionMode match { case SparkExecutionMode.YARN_CLIENT => "client" case SparkExecutionMode.YARN_CLUSTER => "cluster" - case _ => throw new IllegalArgumentException("[StreamPark][Spark] Invalid spark on yarn deployMode, only support \"client\" and \"cluster\".") + case _ => + throw new IllegalArgumentException("[StreamPark][Spark][YarnClient] Invalid spark on yarn deployMode, only support \"client\" and \"cluster\".") }) } private def setSparkConfig(submitRequest: SubmitRequest, sparkLauncher: SparkLauncher): Unit = { logger.info("[StreamPark][Spark][YarnClient] set spark configuration.") - // 1) set spark conf - submitRequest.properties.foreach(prop => { + // 1) put yarn queue + if (SparkExecutionMode.isYarnMode(submitRequest.executionMode)) { + setYarnQueue(submitRequest) + } + + // 2) set spark conf + submitRequest.appProperties.foreach(prop => { val k = prop._1 val v = prop._2 logInfo(s"| $k : $v") sparkLauncher.setConf(k, v) }) - // 2) appArgs... + // 3) set spark args + submitRequest.appArgs.foreach(sparkLauncher.addAppArgs(_)) if (submitRequest.hasExtra("sql")) { sparkLauncher.addAppArgs("--sql", submitRequest.getExtra("sql").toString) } } + protected def setYarnQueue(submitRequest: SubmitRequest): Unit = { + if (submitRequest.hasExtra(KEY_SPARK_YARN_QUEUE_NAME)) { + submitRequest.appProperties.put(KEY_SPARK_YARN_QUEUE, submitRequest.getExtra(KEY_SPARK_YARN_QUEUE_NAME).asInstanceOf[String]) + } + if (submitRequest.hasExtra(KEY_SPARK_YARN_QUEUE_LABEL)) { + submitRequest.appProperties.put(KEY_SPARK_YARN_AM_NODE_LABEL, submitRequest.getExtra(KEY_SPARK_YARN_QUEUE_LABEL).asInstanceOf[String]) + submitRequest.appProperties.put(KEY_SPARK_YARN_EXECUTOR_NODE_LABEL, submitRequest.getExtra(KEY_SPARK_YARN_QUEUE_LABEL).asInstanceOf[String]) + } + } } diff --git a/streampark-spark/streampark-spark-client/streampark-spark-client-core/src/main/scala/org/apache/streampark/spark/client/trait/SparkClientTrait.scala b/streampark-spark/streampark-spark-client/streampark-spark-client-core/src/main/scala/org/apache/streampark/spark/client/trait/SparkClientTrait.scala index 2ef438dc05..5677e62774 100644 --- a/streampark-spark/streampark-spark-client/streampark-spark-client-core/src/main/scala/org/apache/streampark/spark/client/trait/SparkClientTrait.scala +++ b/streampark-spark/streampark-spark-client/streampark-spark-client-core/src/main/scala/org/apache/streampark/spark/client/trait/SparkClientTrait.scala @@ -32,13 +32,13 @@ trait SparkClientTrait extends Logger { |--------------------------------------- spark job start ----------------------------------- | userSparkHome : ${submitRequest.sparkVersion.sparkHome} | sparkVersion : ${submitRequest.sparkVersion.version} - | appName : ${submitRequest.effectiveAppName} + | appName : ${submitRequest.appName} | devMode : ${submitRequest.developmentMode.name()} | execMode : ${submitRequest.executionMode.name()} | applicationType : ${submitRequest.applicationType.getName} - | properties : ${submitRequest.properties.mkString(" ")} - | args : ${submitRequest.args} + | appArgs : ${submitRequest.appArgs} | appConf : ${submitRequest.appConf} + | properties : ${submitRequest.appProperties.mkString(",")} |------------------------------------------------------------------------------------------- |""".stripMargin) @@ -66,7 +66,7 @@ trait SparkClientTrait extends Logger { |----------------------------------------- spark job stop ---------------------------------- | userSparkHome : ${stopRequest.sparkVersion.sparkHome} | sparkVersion : ${stopRequest.sparkVersion.version} - | jobId : ${stopRequest.jobId} + | appId : ${stopRequest.appId} |------------------------------------------------------------------------------------------- |""".stripMargin) @@ -80,8 +80,8 @@ trait SparkClientTrait extends Logger { def doStop(stopRequest: StopRequest): StopResponse private def prepareConfig(submitRequest: SubmitRequest): Unit = { - // 1) set default config - val userConfig = submitRequest.properties.filter(c => { + // 1) filter illegal configuration key + val userConfig = submitRequest.appProperties.filter(c => { val k = c._1 if (k.startsWith("spark.")) { true @@ -90,10 +90,14 @@ trait SparkClientTrait extends Logger { false } }) - val defaultConfig = submitRequest.DEFAULT_SUBMIT_PARAM.filter(c => !userConfig.containsKey(c._1)) - submitRequest.properties.clear() - submitRequest.properties.putAll(userConfig) - submitRequest.properties.putAll(defaultConfig) + val defaultConfig = submitRequest.DEFAULT_SUBMIT_PARAM.filter(c => !userConfig.containsKey(c._1) && !submitRequest.sparkParameterMap.containsKey(c._1)) + submitRequest.appProperties.clear() + // 2) put default configuration + submitRequest.appProperties.putAll(defaultConfig) + // 3) put configuration from template (spark-application.conf) + submitRequest.appProperties.putAll(submitRequest.sparkParameterMap) + // 4) put configuration from appProperties + submitRequest.appProperties.putAll(userConfig) } } diff --git a/streampark-spark/streampark-spark-core/src/main/scala/org/apache/streampark/spark/core/Spark.scala b/streampark-spark/streampark-spark-core/src/main/scala/org/apache/streampark/spark/core/Spark.scala index 5dc31395f9..52123e7604 100644 --- a/streampark-spark/streampark-spark-core/src/main/scala/org/apache/streampark/spark/core/Spark.scala +++ b/streampark-spark/streampark-spark-core/src/main/scala/org/apache/streampark/spark/core/Spark.scala @@ -87,7 +87,7 @@ trait Spark extends Logger { val parameterTool = ParameterTool.fromArgs(args) val sparkSqls = { - val sql = parameterTool.get(KEY_FLINK_SQL()) + val sql = parameterTool.get(KEY_SPARK_SQL()) require(StringUtils.isNotBlank(sql), "Usage: spark sql cannot be null") Try(DeflaterUtils.unzipString(sql)) match { case Success(value) => value diff --git a/streampark-spark/streampark-spark-core/src/main/scala/org/apache/streampark/spark/core/util/SparkSqlValidator.scala b/streampark-spark/streampark-spark-core/src/main/scala/org/apache/streampark/spark/core/util/SparkSqlValidator.scala new file mode 100644 index 0000000000..dd470fde5c --- /dev/null +++ b/streampark-spark/streampark-spark-core/src/main/scala/org/apache/streampark/spark/core/util/SparkSqlValidator.scala @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.spark.core.util + +import org.apache.streampark.common.enums.SparkSqlValidationFailedType +import org.apache.streampark.common.util.{ExceptionUtils, Logger} + +import scala.util.{Failure, Try} + +object SparkSqlValidator extends Logger { + + private[this] val SPARK_SQL_PARSER_CLASS = + "org.apache.spark.sql.execution.SparkSqlParser" + + private[this] val SYNTAX_ERROR_REGEXP = + ".*\\(line\\s(\\d+),\\spos\\s(\\d+)\\).*".r + + def verifySql(sql: String): SparkSqlValidationResult = { + val sqlCommands = SqlCommandParser.parseSQL(sql, r => return r) + Try { + val parserClass = Try(Class.forName(SPARK_SQL_PARSER_CLASS)).get + val parser = parserClass + .getConstructor() + .newInstance() + val method = + parser.getClass.getMethod("parsePlan", classOf[String]) + method.setAccessible(true) + for (call <- sqlCommands) { + Try { + method.invoke(parser, call.originSql) + } match { + case Failure(e) => + val exception = ExceptionUtils.stringifyException(e) + val causedBy = exception.drop(exception.indexOf("Caused by:")) + val cleanUpError = exception.replaceAll("[\r\n]", "") + if (SYNTAX_ERROR_REGEXP.findAllMatchIn(cleanUpError).nonEmpty) { + val SYNTAX_ERROR_REGEXP(line, column) = cleanUpError + val errorLine = call.lineStart + line.toInt - 1 + return SparkSqlValidationResult( + success = false, + failedType = SparkSqlValidationFailedType.SYNTAX_ERROR, + lineStart = call.lineStart, + lineEnd = call.lineEnd, + errorLine = errorLine, + errorColumn = column.toInt, + sql = call.originSql, + exception = causedBy.replaceAll(s"at\\sline\\s$line", s"at line $errorLine")) + } else { + return SparkSqlValidationResult( + success = false, + failedType = SparkSqlValidationFailedType.SYNTAX_ERROR, + lineStart = call.lineStart, + lineEnd = call.lineEnd, + sql = call.originSql, + exception = causedBy) + } + case _ => + } + } + } match { + case Failure(e) => + return SparkSqlValidationResult( + success = false, + failedType = SparkSqlValidationFailedType.CLASS_ERROR, + exception = ExceptionUtils.stringifyException(e)) + case _ => + } + SparkSqlValidationResult() + } + +} diff --git a/streampark-spark/streampark-spark-core/src/main/scala/org/apache/streampark/spark/core/util/SqlCommandParser.scala b/streampark-spark/streampark-spark-core/src/main/scala/org/apache/streampark/spark/core/util/SqlCommandParser.scala index dc1a13ff76..9a9604faae 100644 --- a/streampark-spark/streampark-spark-core/src/main/scala/org/apache/streampark/spark/core/util/SqlCommandParser.scala +++ b/streampark-spark/streampark-spark-core/src/main/scala/org/apache/streampark/spark/core/util/SqlCommandParser.scala @@ -18,7 +18,7 @@ package org.apache.streampark.spark.core.util import org.apache.streampark.common.conf.ConfigKeys.PARAM_PREFIX -import org.apache.streampark.common.enums.FlinkSqlValidationFailedType +import org.apache.streampark.common.enums.SparkSqlValidationFailedType import org.apache.streampark.common.util.Logger import enumeratum.EnumEntry @@ -47,7 +47,7 @@ object SqlCommandParser extends Logger { validationCallback( SparkSqlValidationResult( success = false, - failedType = FlinkSqlValidationFailedType.VERIFY_FAILED, + failedType = SparkSqlValidationFailedType.VERIFY_FAILED, exception = sqlEmptyError)) null } else { @@ -63,7 +63,7 @@ object SqlCommandParser extends Logger { validationCallback( SparkSqlValidationResult( success = false, - failedType = FlinkSqlValidationFailedType.UNSUPPORTED_SQL, + failedType = SparkSqlValidationFailedType.UNSUPPORTED_SQL, lineStart = segment.start, lineEnd = segment.end, exception = s"unsupported sql", @@ -80,7 +80,7 @@ object SqlCommandParser extends Logger { validationCallback( SparkSqlValidationResult( success = false, - failedType = FlinkSqlValidationFailedType.VERIFY_FAILED, + failedType = SparkSqlValidationFailedType.VERIFY_FAILED, exception = "spark sql syntax error, no executable sql")) null } else { @@ -149,211 +149,224 @@ object SqlCommand extends enumeratum.Enum[SqlCommand] { val values: immutable.IndexedSeq[SqlCommand] = findValues - // ---- SELECT Statements-------------------------------------------------------------------------------------------------------------------------------- - case object SELECT extends SqlCommand("select", "(SELECT\\s+.+)") - - // ----CREATE Statements-------------------------------------------------------------------------------------------------------------------------------- - + // ---- ALTER Statements-------------------------------------------------------------------------------------------------------------------------------- /** - *
 CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [catalog_name.][db_name.]table_name ( {
-   *  |  |  }[ ,
-   * ...n] [  ] [  ][ , ...n] ) [COMMENT table_comment]
-   * [PARTITIONED BY (partition_column_name1, partition_column_name2, ...)] WITH (key1=val1,
-   * key2=val2, ...) [ LIKE source_table [(  )] ] 
ALTER { DATABASE | SCHEMA | NAMESPACE } database_name SET { DBPROPERTIES | PROPERTIES } ( property_name = property_value [ , ... ] ) + * ALTER { DATABASE | SCHEMA | NAMESPACE } database_name SET LOCATION 'new_location' */ - case object CREATE_TABLE - extends SqlCommand("create table", "(CREATE\\s+(TEMPORARY\\s+|)TABLE\\s+.+)") + case object ALTER_DATABASE extends SqlCommand("alter database", "(ALTER\\s+(DATABASE\\s+|SCHEMA\\s+|NAMESPACE\\s+)\\s+.+)") - /**
 CREATE CATALOG catalog_name WITH (key1=val1, key2=val2, ...) 
*/ - case object CREATE_CATALOG extends SqlCommand("create catalog", "(CREATE\\s+CATALOG\\s+.+)") + /** + *
 ALTER TABLE table_identifier ....
+ */ + case object ALTER_TABLE extends SqlCommand("alter table", "(ALTER\\s+TABLE\\s+.+)") /** - *
 CREATE DATABASE [IF NOT EXISTS] [catalog_name.]db_name
[COMMENT database_comment]
- * WITH (key1=val1, key2=val2, ...)
+ *
 ALTER VIEW view_identifier ....
*/ - case object CREATE_DATABASE extends SqlCommand("create database", "(CREATE\\s+DATABASE\\s+.+)") + case object ALTER_VIEW extends SqlCommand("alter view", "(ALTER\\s+VIEW\\s+.+)") + + // ---- CREATE Statements-------------------------------------------------------------------------------------------------------------------------------- /** - *
 CREATE [TEMPORARY] VIEW [IF NOT EXISTS] [catalog_name.][db_name.]view_name [( columnName
-   * [, columnName ]* )] [COMMENT view_comment] AS query_expression< 
CREATE { DATABASE | SCHEMA } [ IF NOT EXISTS ] database_name
[ COMMENT database_comment ] + * [ LOCATION database_directory ]
[ WITH DBPROPERTIES ( property_name = property_value [ , ... ] ) ] */ - case object CREATE_VIEW - extends SqlCommand( - "create view", - "(CREATE\\s+(TEMPORARY\\s+|)VIEW\\s+(IF\\s+NOT\\s+EXISTS\\s+|)(\\S+)\\s+AS\\s+SELECT\\s+.+)") + case object CREATE_DATABASE extends SqlCommand("create database", "(CREATE\\s+(DATABASE\\s+|SCHEMA\\s+)\\s+.+)") /** - *
 CREATE [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF NOT EXISTS]
-   * [catalog_name.][db_name.]function_name AS identifier [LANGUAGE JAVA|SCALA|PYTHON] 
CREATE [ OR REPLACE ] [ TEMPORARY ] FUNCTION [ IF NOT EXISTS ] + * function_name AS class_name [ resource_locations ] DROP statements are used to remove a catalog with the given catalog name or to remove a - * registered table/view/function from the current or specified Catalog. + *
 CREATE TABLE [ IF NOT EXISTS ] table_identifier
+   *     [ ( col_name1 col_type1 [ COMMENT col_comment1 ], ... ) ]
+   *     USING data_source ....
+ *
 CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier
+   *     [ ( col_name1[:] col_type1 [ COMMENT col_comment1 ], ... ) ]
+   *     [ COMMENT table_comment ] ....
+ * + *
 CREATE TABLE [IF NOT EXISTS] table_identifier LIKE source_table_identifier
+   *     USING data_source ....
*/ + case object CREATE_TABLE + extends SqlCommand("create table", "(CREATE\\s+(EXTERNAL\\s+|)TABLE\\s+(IF\\s+NOT\\s+EXISTS\\s+|).+)") - /** DROP CATALOG [IF EXISTS] catalog_name */ - case object DROP_CATALOG extends SqlCommand("drop catalog", "(DROP\\s+CATALOG\\s+.+)") - - /** DROP [TEMPORARY] TABLE [IF EXISTS] [catalog_name.][db_name.]table_name */ - case object DROP_TABLE extends SqlCommand("drop table", "(DROP\\s+(TEMPORARY\\s+|)TABLE\\s+.+)") + /** + *
 CREATE [TEMPORARY] VIEW [IF NOT EXISTS] [catalog_name.][db_name.]view_name
+   * [( columnName[, columnName ]* )] [COMMENT view_comment] AS query_expression 
+ */ + case object CREATE_VIEW + extends SqlCommand( + "create view", + "(CREATE\\s+(OR\\s+REPLACE\\s+|)((GLOBAL\\s+|)TEMPORARY\\s+|)VIEW\\s+(IF\\s+NOT\\s+EXISTS\\s+|)(\\S+)\\s+AS\\s+SELECT\\s+.+)") - /** DROP DATABASE [IF EXISTS] [catalog_name.]db_name [ (RESTRICT | CASCADE) ] */ - case object DROP_DATABASE extends SqlCommand("drop database", "(DROP\\s+DATABASE\\s+.+)") + // ----DROP Statements-------------------------------------------------------------------------------------------------------------------------------- - /** DROP [TEMPORARY] VIEW [IF EXISTS] [catalog_name.][db_name.]view_name */ - case object DROP_VIEW extends SqlCommand("drop view", "(DROP\\s+(TEMPORARY\\s+|)VIEW\\s+.+)") + /**
 DROP { DATABASE | SCHEMA } [ IF EXISTS ] dbname [ RESTRICT | CASCADE ]
*/ + case object DROP_DATABASE extends SqlCommand("drop database", "(DROP\\s+(DATABASE\\s+|SCHEMA\\s+)(IF\\s+EXISTS\\s+|).+)") - /** - * DROP [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS] - * [catalog_name.][db_name.]function_name - */ + /**
 DROP [ TEMPORARY ] FUNCTION [ IF EXISTS ] function_name
*/ case object DROP_FUNCTION extends SqlCommand( "drop function", - "(DROP\\s+(TEMPORARY\\s+|TEMPORARY\\s+SYSTEM\\s+|)FUNCTION\\s+.+)") + "(DROP\\s+(TEMPORARY\\s+|)FUNCTION\\s+(IF\\s+EXISTS\\s+|).+)") + + /**
 DROP TABLE [ IF EXISTS ] table_identifier [ PURGE ]
*/ + case object DROP_TABLE extends SqlCommand("drop table", "(DROP\\s+TABLE\\s+(IF\\s+EXISTS\\s+|).+)") + + /**
 DROP VIEW [ IF EXISTS ] view_identifier
*/ + case object DROP_VIEW extends SqlCommand("drop view", "(DROP\\s+VIEW\\s+(IF\\s+EXISTS\\s+|).+)") - // ----ALTER Statements-------------------------------------------------------------------------------------------------------------------------------- + // ---- REPAIR Statements-------------------------------------------------------------------------------------------------------------------------------- + + /**
 [MSCK] REPAIR TABLE table_identifier [{ADD|DROP|SYNC} PARTITIONS]
*/ + case object REPAIR_TABLE extends SqlCommand("repair table", "((MSCK\\s+|)DROP\\s+TABLE\\s+.+)") + + // ---- TRUNCATE Statements----------------------------------------------------------------------------------------------------------------------------- + + /**
 TRUNCATE TABLE table_identifier [ partition_spec ]
*/ + case object TRUNCATE_TABLE extends SqlCommand("truncate table", "(TRUNCATE\\s+TABLE\\s+.+)") + + // ---- TRUNCATE Statements----------------------------------------------------------------------------------------------------------------------------- + + /**
 USE database_name
*/ + case object USE_DATABASE extends SqlCommand("use database", "(USE\\s+.+)") + + // ---- INSERT Statement-------------------------------------------------------------------------------------------------------------------------------- /** - * ALTER TABLE [catalog_name.][db_name.]table_name RENAME TO new_table_name - * - * ALTER TABLE [catalog_name.][db_name.]table_name SET (key1=val1, key2=val2, - * ...) + *
 INSERT [ INTO | OVERWRITE ] [ TABLE ] table_identifier [ partition_spec ] [ ( column_list ) ]
+   * { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query }
*/ - case object ALTER_TABLE extends SqlCommand("alter table", "(ALTER\\s+TABLE\\s+.+)") + case object INSERT extends SqlCommand("insert", "(INSERT\\s+(INTO|OVERWRITE)\\s+.+)") /** - * ALTER VIEW [catalog_name.][db_name.]view_name RENAME TO new_view_name - * - * ALTER VIEW [catalog_name.][db_name.]view_name AS new_query_expression + *
 INSERT INTO [ TABLE ] table_identifier REPLACE WHERE boolean_expression query
    */
-  case object ALTER_VIEW extends SqlCommand("alter view", "(ALTER\\s+VIEW\\s+.+)")
-
-  /** ALTER DATABASE [catalog_name.]db_name SET (key1=val1, key2=val2, ...) */
-  case object ALTER_DATABASE extends SqlCommand("alter database", "(ALTER\\s+DATABASE\\s+.+)")
+  case object INSERT_REPLACE extends SqlCommand("insert replace", "(INSERT\\s+INTO\\s+(TABLE|)\\s+(\\S+)\\s+REPLACE\\s+WHERE\\s+.+)")
 
   /**
-   *  ALTER [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS]
-   * [catalog_name.][db_name.]function_name AS identifier [LANGUAGE JAVA|SCALA|PYTHON] 
+   * 
 INSERT OVERWRITE [ LOCAL ] DIRECTORY [ directory_path ]
+   * { spark_format | hive_format }
+   * { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query }
    */
-  case object ALTER_FUNCTION
-    extends SqlCommand(
-      "alter function",
-      "(ALTER\\s+(TEMPORARY\\s+|TEMPORARY\\s+SYSTEM\\s+|)FUNCTION\\s+.+)")
-
-  // ---- INSERT Statement--------------------------------------------------------------------------------------------------------------------------------
+  case object INSERT_OVERWRITE_DIRECTORY extends SqlCommand("insert overwrite directory", "(INSERT\\s+OVERWRITE\\s+(LOCAL|)\\s+DIRECTORY\\s+.+)")
 
+  // ----LOAD Statements--------------------------------------------------------------------------------------------------------------------------------
   /**
-   * INSERT { INTO | OVERWRITE } [catalog_name.][db_name.]table_name [PARTITION part_spec]
-   * [column_list] select_statement INSERT { INTO | OVERWRITE } [catalog_name.][db_name.]table_name
-   * VALUES values_row [, values_row ...]
+   * 
 LOAD DATA [ LOCAL ] INPATH path [ OVERWRITE ] INTO TABLE table_identifier [ partition_spec ]
*/ - case object INSERT extends SqlCommand("insert", "(INSERT\\s+(INTO|OVERWRITE)\\s+.+)") + case object LOAD_DATE extends SqlCommand("load data", "(LOAD\\s+DATA\\s+(LOCAL|)\\s+INPATH\\s+.+)") - // ---- DESCRIBE Statement-------------------------------------------------------------------------------------------------------------------------------- - - /** { DESCRIBE | DESC } [catalog_name.][db_name.]table_name */ - case object DESC extends SqlCommand("desc", "(DESC\\s+.+)") + // ---- SELECT Statements-------------------------------------------------------------------------------------------------------------------------------- + case object SELECT extends SqlCommand("select", "(SELECT\\s+.+)") - /** { DESCRIBE | DESC } [catalog_name.][db_name.]table_name */ - case object DESCRIBE extends SqlCommand("describe", "(DESCRIBE\\s+.+)") + case object WITH_SELECT extends SqlCommand("with select", "(WITH\\s+.+)") // ---- EXPLAIN Statement-------------------------------------------------------------------------------------------------------------------------------- - case object EXPLAIN extends SqlCommand("explain", "(EXPLAIN\\s+.+)") + /**
EXPLAIN [ EXTENDED | CODEGEN | COST | FORMATTED ] statement
 */
+  case object EXPLAIN extends SqlCommand("explain", "(EXPLAIN\\s+(EXTENDED|CODEGEN|COST|FORMATTED)\\s.+)")
 
-  // ---- USE Statements--------------------------------------------------------------------------------------------------------------------------------
+  // ---- ADD Statement--------------------------------------------------------------------------------------------------------------------------------
+  /** 
 ADD { FILE | FILES } resource_name [ ... ]
*/ + case object ADD_FILE extends SqlCommand("add file", "(ADD\\s+(FILE|FILES)\\s+.+)") - /** USE CATALOG catalog_name */ - case object USE_CATALOG extends SqlCommand("use catalog", "(USE\\s+CATALOG\\s+.+)") + /**
 ADD { JAR | JARS } file_name [ ... ]
*/ + case object ADD_JAR extends SqlCommand("add jar", "(ADD\\s+(JAR|JARS)\\s+.+)") - /** USE MODULES module_name1[, module_name2, ...] */ - case object USE_MODULES extends SqlCommand("use modules", "(USE\\s+MODULES\\s+.+)") + // ---- ANALYZE Statement-------------------------------------------------------------------------------------------------------------------------------- - /** USE [catalog_name.]database_name */ - case object USE_DATABASE extends SqlCommand("use database", "(USE\\s+(?!(CATALOG|MODULES)).+)") + /** + *
 ANALYZE TABLE table_identifier [ partition_spec ]
+   * COMPUTE STATISTICS [ NOSCAN | FOR COLUMNS col [ , ... ] | FOR ALL COLUMNS ]
+   *
+   * ANALYZE TABLES [ { FROM | IN } database_name ] COMPUTE STATISTICS [ NOSCAN ]
+ */ + case object ANALYZE_TABLE extends SqlCommand("analyze table", "(ANALYZE\\s+(TABLES|TABLE)\\s+.+)") - // ----SHOW Statements-------------------------------------------------------------------------------------------------------------------------------- + // ---- CACHE Statement-------------------------------------------------------------------------------------------------------------------------------- - /** SHOW CATALOGS */ - case object SHOW_CATALOGS extends SqlCommand("show catalogs", "(SHOW\\s+CATALOGS\\s*)") + /** + *
 CACHE [ LAZY ] TABLE table_identifier
+   * [ OPTIONS ( 'storageLevel' [ = ] value ) ] [ [ AS ] query ]
+ */ + case object CACHE_TABLE extends SqlCommand("cache table", "(CACHE\\s+(LAZY\\s+|)TABLE\\s+.+)") - /** SHOW CURRENT CATALOG */ - case object SHOW_CURRENT_CATALOG - extends SqlCommand("show current catalog", "(SHOW\\s+CURRENT\\s+CATALOG\\s*)") + /**
UNCACHE TABLE [ IF EXISTS ] table_identifier
*/ + case object UNCACHE_TABLE extends SqlCommand("uncache table", "(UNCACHE\\s+TABLE\\s+.+)") - /** SHOW DATABASES */ - case object SHOW_DATABASES extends SqlCommand("show databases", "(SHOW\\s+DATABASES\\s*)") + /**
CLEAR CACHE
*/ + case object CLEAR_CACHE extends SqlCommand("clear cache", "(CLEAR\\s+CACHE\\s*)") - /** SHOW CURRENT DATABASE */ - case object SHOW_CURRENT_DATABASE - extends SqlCommand("show current database", "(SHOW\\s+CURRENT\\s+DATABASE\\s*)") + // ---- DESCRIBE Statement-------------------------------------------------------------------------------------------------------------------------------- - case object SHOW_TABLES extends SqlCommand("show tables", "(SHOW\\s+TABLES.*)") + case object DESCRIBE extends SqlCommand("describe", "((DESCRIBE|DESC)\\s+.+)") - case object SHOW_CREATE_TABLE - extends SqlCommand("show create table", "(SHOW\\s+CREATE\\s+TABLE\\s+.+)") + // ---- LIST Statement-------------------------------------------------------------------------------------------------------------------------------- - case object SHOW_COLUMNS extends SqlCommand("show columns", "(SHOW\\s+COLUMNS\\s+.+)") + /**
LIST { FILE | FILES } file_name [ ... ]
*/ + case object LIST_FILE extends SqlCommand("list file", "(LIST\\s+(FILE|FILES)\\s+.+)") - /** SHOW VIEWS */ - case object SHOW_VIEWS extends SqlCommand("show views", "(SHOW\\s+VIEWS\\s*)") + /**
LIST { JAR | JARS } file_name [ ... ]
*/ + case object LIST_JAR extends SqlCommand("list jar", "(LIST\\s+(JAR|JARS)\\s+.+)") - /** SHOW CREATE VIEW */ - case object SHOW_CREATE_VIEW - extends SqlCommand("show create view", "(SHOW\\s+CREATE\\s+VIEW\\s+.+)") + // ---- REFRESH Statement-------------------------------------------------------------------------------------------------------------------------------- + case object REFRESH extends SqlCommand("refresh", "(REFRESH\\s+.+)") - /** SHOW [USER] FUNCTIONS */ - case object SHOW_FUNCTIONS - extends SqlCommand("show functions", "(SHOW\\s+(USER\\s+|)FUNCTIONS\\s*)") + // ----SET Statements-------------------------------------------------------------------------------------------------------------------------------- - /** SHOW [FULL] MODULES */ - case object SHOW_MODULES extends SqlCommand("show modules", "(SHOW\\s+(FULL\\s+|)MODULES\\s*)") + /** + *
 SET
+   * SET [ -v ]
+   * SET property_key[ = property_value ]
+ */ + case object SET + extends SqlCommand("set", "(SET(|\\s+.+))") - // ----LOAD Statements-------------------------------------------------------------------------------------------------------------------------------- + // ----RESET Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** + *
 RESET;
+   * RESET configuration_key;
+ */ + case object RESET extends SqlCommand("reset", "RESET\\s*(.*)?") - /** LOAD MODULE module_name [WITH ('key1' = 'val1', 'key2' = 'val2', ...)] */ - case object LOAD_MODULE extends SqlCommand("load module", "(LOAD\\s+MODULE\\s+.+)") + // ----SHOW Statements-------------------------------------------------------------------------------------------------------------------------------- - // ----UNLOAD Statements-------------------------------------------------------------------------------------------------------------------------------- + /**
SHOW COLUMNS table_identifier [ database ]
*/ + case object SHOW_COLUMNS extends SqlCommand("show columns", "(SHOW\\s+COLUMNS\\s+.+)") - /** UNLOAD MODULE module_name */ - case object UNLOAD_MODULE extends SqlCommand("unload module", "(UNLOAD\\s+MODULE\\s+.+)") + /**
SHOW CREATE TABLE table_identifier [ AS SERDE ]
*/ + case object SHOW_CREATE_TABLE extends SqlCommand("show create table", "(SHOW\\s+CREATE\\s+TABLE\\s+.+)") - // ----SET Statements-------------------------------------------------------------------------------------------------------------------------------- + /**
SHOW { DATABASES | SCHEMAS } [ LIKE regex_pattern ]
*/ + case object SHOW_DATABASES extends SqlCommand("show databases", "(SHOW\\s+(DATABASES|SCHEMAS)\\s+.+)") - /** SET ('key' = 'value') */ - case object SET - extends SqlCommand( - "set", - "SET(\\s+(\\S+)\\s*=(.*))?", - { - case a if a.length < 3 => None - case a if a.head == null => Some(Array[String](cleanUp(a.head))) - case a => Some(Array[String](cleanUp(a(1)), cleanUp(a(2)))) - }) + /**
SHOW [ function_kind ] FUNCTIONS [ { FROM | IN } database_name ] [ LIKE regex_pattern ]
*/ + case object SHOW_FUNCTIONS extends SqlCommand("show functions", "(SHOW\\s+(USER|SYSTEM|ALL|)\\s+FUNCTIONS\\s+.+)") - // ----RESET Statements-------------------------------------------------------------------------------------------------------------------------------- + /**
SHOW PARTITIONS table_identifier [ partition_spec ]
*/ + case object SHOW_PARTITIONS extends SqlCommand("show partitions", "(SHOW\\s+PARTITIONS\\s+.+)") - /** RESET ('key') */ - case object RESET extends SqlCommand("reset", "RESET\\s+'(.*)'") + /**
SHOW TABLE EXTENDED [ { IN | FROM } database_name ] LIKE regex_pattern [ partition_spec ]
*/ + case object SHOW_TABLE_EXTENDED extends SqlCommand("show table extended", "(SHOW\\s+TABLE\\s+EXTENDED\\s+.+)") - /** RESET */ - case object RESET_ALL extends SqlCommand("reset all", "RESET", _ => Some(Array[String]("ALL"))) + /**
SHOW TABLES [ { FROM | IN } database_name ] [ LIKE regex_pattern ]
*/ + case object SHOW_TABLES extends SqlCommand("show tables", "(SHOW\\s+TABLES\\s+.+)") - case object DELETE extends SqlCommand("delete", "(DELETE\\s+FROM\\s+.+)") + /**
SHOW TBLPROPERTIES table_identifier [ ( unquoted_property_key | property_key_as_string_literal ) ]
*/ + case object SHOW_TBLPROPERTIES extends SqlCommand("show tblproperties", "(SHOW\\s+TBLPROPERTIES\\s+.+)") - case object UPDATE extends SqlCommand("update", "(UPDATE\\s+.+)") + /**
SHOW VIEWS [ { FROM | IN } database_name ] [ LIKE regex_pattern ]
*/ + case object SHOW_VIEWS extends SqlCommand("show views", "(SHOW\\s+VIEWS\\s+.+)") private[this] def cleanUp(sql: String): String = sql.trim.replaceAll("^(['\"])|(['\"])$", "") @@ -370,7 +383,7 @@ case class SqlCommandCall( case class SparkSqlValidationResult( success: JavaBool = true, - failedType: FlinkSqlValidationFailedType = null, + failedType: SparkSqlValidationFailedType = null, lineStart: Int = 0, lineEnd: Int = 0, errorLine: Int = 0, diff --git a/streampark-spark/streampark-spark-sqlclient/src/main/scala/org/apache/streampark/spark/cli/SqlClient.scala b/streampark-spark/streampark-spark-sqlclient/src/main/scala/org/apache/streampark/spark/cli/SqlClient.scala index 64705e4a26..b8075c4b1a 100644 --- a/streampark-spark/streampark-spark-sqlclient/src/main/scala/org/apache/streampark/spark/cli/SqlClient.scala +++ b/streampark-spark/streampark-spark-sqlclient/src/main/scala/org/apache/streampark/spark/cli/SqlClient.scala @@ -17,10 +17,10 @@ package org.apache.streampark.spark.cli -import org.apache.streampark.common.conf.ConfigKeys.KEY_FLINK_SQL +import org.apache.streampark.common.conf.ConfigKeys.KEY_SPARK_SQL import org.apache.streampark.common.util.DeflaterUtils import org.apache.streampark.spark.core.{SparkBatch, SparkStreaming} -import org.apache.streampark.spark.core.util.{ParameterTool, SqlCommand, SqlCommandParser} +import org.apache.streampark.spark.core.util.ParameterTool import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.DataFrame @@ -35,7 +35,7 @@ object SqlClient extends App { private[this] val parameterTool = ParameterTool.fromArgs(args) private[this] val sparkSql = { - val sql = parameterTool.get(KEY_FLINK_SQL()) + val sql = parameterTool.get(KEY_SPARK_SQL()) require(StringUtils.isNotBlank(sql), "Usage: spark sql cannot be null") Try(DeflaterUtils.unzipString(sql)) match { case Success(value) => value @@ -44,9 +44,6 @@ object SqlClient extends App { } } - private[this] val sets = - SqlCommandParser.parseSQL(sparkSql).filter(_.command == SqlCommand.SET) - private[this] val defaultMode = "BATCH" private[this] val mode = defaultMode