Skip to content

Commit

Permalink
Rebase on 3.4
Browse files Browse the repository at this point in the history
  • Loading branch information
gibchikafa committed Nov 15, 2023
1 parent 0226116 commit d4ad939
Show file tree
Hide file tree
Showing 13 changed files with 475 additions and 30 deletions.
32 changes: 31 additions & 1 deletion attributes/default.rb
Original file line number Diff line number Diff line change
Expand Up @@ -521,4 +521,34 @@
default['judge']['port'] = "1111"
default['judge']['home'] = "#{node['install']['dir']}/judge"
default['judge']['etc'] = "#{node['judge']['home']}/etc"
default['judge']['logs'] = "#{node['judge']['home']}/logs"
default['judge']['logs'] = "#{node['judge']['home']}/logs"


default['hopsworks']['rstudio_dir'] = node['hopsworks']['dir'] + "/rstudio"
#rstudio configuration variables
default["hopsworks"]['rstudio_host'] = "localhost"
default["hopsworks"]['rstudio_origin_scheme'] = "https"
default["hopsworks"]["rstudio_www_address"] = "0.0.0.0"
default["hopsworks"]["rstudio_session_timeout_minutes"] = 360
default["hopsworks"]["rstudio_logging_level"] = "info"
default["hopsworks"]["rstudio_logger_type"] = "file"
default["hopsworks"]["rstudio_log_file_max_size"] = 512
default["hopsworks"]["rstudio_default_cran_repo"] = "https://cloud.r-project.org/"

default['rstudio']['base_dir'] = node['install']['dir'].empty? ? node['hopsworks']['dir'] + "/rstudio" : node['install']['dir'] + "/rstudio"
default['rstudio']['shutdown_timer_interval'] = "30m"

# CRAN
default['rstudio']['cran']['mirror'] = 'http://cran.rstudio.com/'

# APT configuration for Ubuntu or Debian installs.
case node["platform"].downcase
when "ubuntu"
default['rstudio']['apt']['key'] = 'E084DAB9'
default['rstudio']['apt']['keyserver'] = 'keyserver.ubuntu.com'
default['rstudio']['apt']['uri'] = 'http://cran.stat.ucla.edu/bin/linux/ubuntu'
when "debian"
default['rstudio']['apt']['key'] = '381BA480'
default['rstudio']['apt']['keyserver'] = 'subkeys.pgp.net'
default['rstudio']['apt']['uri'] = 'http://cran.stat.ucla.edu/bin/linux/debian'
end
1 change: 1 addition & 0 deletions files/default/hopsworks_templates/config_template.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"executorCores": ${conf.executorCores?c},
"executorMemory": "${conf.executorMemory}",
"proxyUser": "${conf.hdfsUser}",
"name": "${conf.livySessionName}",
"queue": "${conf.yarnQueue}",
"conf": {
${conf.sparkConfiguration}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[*]
log-level=warn
logger-type=syslog

[@rserver]
log-level=${conf.logLevel}
logger-type=${conf.loggerType}
max-size-mb=${conf.maxSizeMb}
log-file-include-pid=${conf.includePid}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
rsession-which-r=${conf.versionPath}
www-address=${conf.ipAddress}
www-port=${conf.port}
www-root-path=${conf.rootPath}
server-user=${conf.serverUser}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
session-timeout-minutes=${conf.sessionTimeoutMinutes}
r-cran-repos=${conf.cranRepo}
10 changes: 10 additions & 0 deletions files/default/hopsworks_templates/sparklyr_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
default:
livy.driverCores: ${conf.driverCores}
livy.driverMemory: "${conf.driverMemory}"
livy.numExecutors: ${conf.numExecutors}
livy.executorCores: ${conf.executorCores}
livy.executorMemory: "${conf.executorMemory}"
livy.proxyUser: "${conf.proxyUser}"
livy.queue: "${conf.yarnQueue}"
livy.name: "${conf.livyAppName}"
${conf.sparkConfiguration}
43 changes: 16 additions & 27 deletions files/default/sql/ddl/3.4.0__initial_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1084,17 +1084,21 @@ CREATE TABLE `rstudio_interpreter` (
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `rstudio_project` (
`port` int(11) NOT NULL,
`port` int NOT NULL,
`hdfs_user_id` int NOT NULL,
`created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`expires` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`last_accessed` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`host_ip` varchar(255) COLLATE latin1_general_cs NOT NULL,
`token` varchar(255) COLLATE latin1_general_cs NOT NULL,
`secret` varchar(64) COLLATE latin1_general_cs NOT NULL,
`pid` bigint(20) NOT NULL,
`project_id` int(11) NOT NULL,
`secret` varchar(64) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`pid` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`project_id` int NOT NULL,
`login_password` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
`login_username` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
PRIMARY KEY (`port`),
KEY `hdfs_user_idx` (`hdfs_user_id`),
KEY `project_id` (`project_id`),
CONSTRAINT `FK_284_578` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
CONSTRAINT `FK_103_577` FOREIGN KEY (`hdfs_user_id`) REFERENCES `hops`.`hdfs_users` (`id`) ON DELETE CASCADE,
CONSTRAINT `FK_284_578` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE
) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;

Expand All @@ -1107,32 +1111,17 @@ CREATE TABLE `rstudio_project` (
CREATE TABLE `rstudio_settings` (
`project_id` int(11) NOT NULL,
`team_member` varchar(150) COLLATE latin1_general_cs NOT NULL,
`num_tf_ps` int(11) DEFAULT '1',
`num_tf_gpus` int(11) DEFAULT '0',
`num_mpi_np` int(11) DEFAULT '1',
`appmaster_cores` int(11) DEFAULT '1',
`appmaster_memory` int(11) DEFAULT '1024',
`num_executors` int(11) DEFAULT '1',
`num_executor_cores` int(11) DEFAULT '1',
`executor_memory` int(11) DEFAULT '1024',
`dynamic_initial_executors` int(11) DEFAULT '1',
`dynamic_min_executors` int(11) DEFAULT '1',
`dynamic_max_executors` int(11) DEFAULT '1',
`secret` varchar(255) COLLATE latin1_general_cs NOT NULL,
`log_level` varchar(32) COLLATE latin1_general_cs DEFAULT 'INFO',
`mode` varchar(32) COLLATE latin1_general_cs NOT NULL,
`umask` varchar(32) COLLATE latin1_general_cs DEFAULT '022',
`advanced` tinyint(1) DEFAULT '0',
`archives` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`jars` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`files` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`py_files` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`spark_params` varchar(6500) COLLATE latin1_general_cs DEFAULT '',
`shutdown_level` int(11) NOT NULL DEFAULT '6',
`base_dir` varchar(255) COLLATE latin1_general_cs DEFAULT NULL,
`job_config` varchar(11000) COLLATE latin1_general_cs DEFAULT NULL,
`docker_config` varchar(1000) COLLATE latin1_general_cs DEFAULT NULL,
PRIMARY KEY (`project_id`,`team_member`),
KEY `team_member` (`team_member`),
KEY `secret_idx` (`secret`),
CONSTRAINT `RS_FK_USERS` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `RS_FK_USERS` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO
ACTION,
CONSTRAINT `RS_FK_PROJS` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;
Expand Down
38 changes: 37 additions & 1 deletion recipes/install.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,14 @@
action :create
end

#update permissions of base_dir for rstudio to 770
directory node['rstudio']['base_dir'] do
owner node['hops']['yarnapp']['user']
group node['hops']['group']
mode "770"
action :create
end

directory node['hopsworks']['dir'] do
owner node['hopsworks']['user']
group node['hopsworks']['group']
Expand Down Expand Up @@ -641,6 +649,23 @@
not_if { node['install']['kubernetes'].casecmp("true") == 0 }
end

kagent_sudoers "rstudio" do
user node['glassfish']['user']
group "root"
script_name "rstudio.sh"
template "rstudio.sh.erb"
run_as "ALL" # run this as root - inside we change to different users
end

kagent_sudoers "rstudio-project-cleanup" do
user node['glassfish']['user']
group "root"
script_name "rstudio-project-cleanup.sh"
template "rstudio-project-cleanup.sh.erb"
run_as "ALL"
not_if { node['install']['kubernetes'].casecmp("true") == 0 }
end

kagent_sudoers "convert-ipython-notebook" do
user node['glassfish']['user']
group "root"
Expand Down Expand Up @@ -739,7 +764,7 @@
end

["tensorboard-launch.sh", "tensorboard-cleanup.sh", "condasearch.sh", "list_environment.sh", "jupyter-kill.sh",
"tfserving-kill.sh", "sklearn_serving-launch.sh", "sklearn_serving-kill.sh", "git-container-kill.sh"].each do |script|
"tfserving-kill.sh", "sklearn_serving-launch.sh", "sklearn_serving-kill.sh", "git-container-kill.sh", "rstudio-kill.sh"].each do |script|
template "#{theDomain}/bin/#{script}" do
source "#{script}.erb"
owner node['glassfish']['user']
Expand All @@ -760,6 +785,17 @@
})
end

template "#{theDomain}/bin/rstudio-launch.sh" do
source "rstudio-launch.sh.erb"
owner node['glassfish']['user']
group node['glassfish']['group']
mode "500"
action :create
variables({
:namenode_fdqn => namenode_fdqn,
})
end

template "#{theDomain}/bin/git-container-launch.sh" do
source "git-container-launch.sh.erb"
owner node['glassfish']['user']
Expand Down
26 changes: 26 additions & 0 deletions templates/default/rstudio-kill.sh.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

help() {
echo ""
echo "usage: $0 CONTAINER_ID PROJECT_USER_NAME"
echo ""
exit 1
}


if [ $# -ne 2 ]; then
help
fi

if [ "$2" != "" ]; then
PROJECT_USER_NAME=$2
CONTAINER_NAME=${PROJECT_USER_NAME}__rstudio

echo "Killing input container_name: $CONTAINER_NAME"
docker rm -f "$CONTAINER_NAME" > /dev/null 2>&1
exit $?
fi

echo "Killing input container_id: $1"
docker rm -f "$1" > /dev/null 2>&1
exit $?
130 changes: 130 additions & 0 deletions templates/default/rstudio-launch.sh.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/usr/bin/env bash

WAIT_START=60
RSTUDIO_HOME=$1
HADOOP_HOME=$2
HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
HADOOP_USERNAME=$3
PORT=$4
SECRET_DIR=$5
CERTS_DIR=$6
IMAGE=$7
LOGFILE=${RSTUDIO_HOME}/logs/$8
PROJECT_NAME=$9
LIVY_IP=${10}
LIVY_PORT=${11}
HADOOP_BASE_DIR=${12}
SERVER_PASSWORD=${13}
HADOOP_VERSION=${14}
SPARK_VERSION=${15}
HADOOP_CLIENT_ENV_OPTS='-D fs.permissions.umask-mode=0002'
CONTAINER_NAME=${HADOOP_USERNAME}__rstudio
PID_FILE=${RSTUDIO_HOME}/run/rstudio.pid
SPARK_CONF_DIR=/srv/hops/spark/conf
FLINK_CONF_DIR=/srv/hops/flink/conf
NOT_FOUND=127
SPARKLYR_CONFIG_FILE=${RSTUDIO_HOME}/conf/config.yml
NAMENODE_IP=<%= @namenode_fdqn %>
NAMENODE_PORT=<%= node['hops']['nn']['port'] %>
CLIENT_CERTIFICATES_BUNDLE=$CERTS_DIR/certificate_bundle.pem
ROOT_CA_BUNDLE=$CERTS_DIR/root_ca.pem
CLIENT_KEY=$CERTS_DIR/private_key.pem


help() {
echo ""
echo "usage: $0 RSTUDIO_HOME HADOOP_HOME HADOOP_USERNAME PORT SECRET_DIR CERTS_DIR IMAGE LOGFILE PROJECT_NAME LIVY_IP LIVY_PORT HADOOP_BASE_DIR SERVER_PASSWORD HADOOP_VERSION SPARK_VERSION"
echo ""
exit 1
}

function kill_named {
CID=$(docker container list -a | grep $CONTAINER_NAME | grep -v grep | awk '{print $1}')
if [ "$CID" != "" ] ; then
docker rm -f "$CID" > /dev/null 2>&1
res=$?
else
res=$NOT_FOUND
fi
return "$res"
}

if [ $# -ne 15 ]; then
help
fi

#check if the folders exist
cd "$RSTUDIO_HOME" || exit
cd "$SECRET_DIR" || exit

kill_named

if [ -f "$PID_FILE" ] ; then
rm $PID_FILE
fi

touch $LOGFILE
if [ $? -ne 0 ] ; then
echo "Error: could not create the log file for rstudio server"
exit 1
fi
chmod 766 $LOGFILE

docker run --rm -d --cap-add SYS_ADMIN --device /dev/fuse --security-opt apparmor:unconfined --name $CONTAINER_NAME --cidfile=$PID_FILE\
--network=host \
--init \
-e "RSTUDIO_PATH=$RSTUDIO_HOME" \
-e "RSTUDIO_DATA_DIR=$RSTUDIO_HOME" \
-e "PDIR=$SECRET_DIR" \
-e "RSTUDIO_CONFIG_DIR=${RSTUDIO_HOME}/conf" \
-e "RSTUDIO_RUNTIME_DIR=${RSTUDIO_HOME}/run" \
-e "HADOOP_HDFS_HOME=${HADOOP_HOME}" \
-e "HADOOP_CONF_DIR=${HADOOP_CONF_DIR}" \
-e "HADOOP_CLIENT_OPTS='-Dfs.permissions.umask-mode=0002'" \
-e "MATERIAL_DIRECTORY=$CERTS_DIR" \
-e "HADOOP_USERNAME=$HADOOP_USERNAME" \
-e "HADOOP_HOME=${HADOOP_HOME}" \
-e "LOGFILE=${LOGFILE}" \
-e "RSTUDIO_PORT=${PORT}" \
-e "PROJECT_NAME=${PROJECT_NAME}" \
-e "LIVY_IP=${LIVY_IP}" \
-e "LIVY_PORT=${LIVY_PORT}" \
-e "HADOOP_BASE_DIR=${HADOOP_BASE_DIR}" \
-e "HADOOP_CLIENT_ENV_OPTS=${HADOOP_CLIENT_ENV_OPTS}" \
-e "SPARKLYR_CONFIG_FILE=${SPARKLYR_CONFIG_FILE}" \
-e "SERVER_PASSWORD=${SERVER_PASSWORD}" \
-e "HADOOP_VERSION=${HADOOP_VERSION}" \
-e "NAMENODE_IP=${NAMENODE_IP}" \
-e "NAMENODE_PORT=${NAMENODE_PORT}" \
-e "SPARK_VERSION=${SPARK_VERSION}" \
-e "CLIENT_CERTIFICATES_BUNDLE=${CLIENT_CERTIFICATES_BUNDLE}" \
-e "ROOT_CA_BUNDLE=${ROOT_CA_BUNDLE}" \
-e "CLIENT_KEY=${CLIENT_KEY}" \
-v $RSTUDIO_HOME:$RSTUDIO_HOME:rw\
-v $SECRET_DIR:$SECRET_DIR:rw\
-v ${HADOOP_CONF_DIR}:${HADOOP_CONF_DIR}:ro \
-v ${SPARK_CONF_DIR}:${SPARK_CONF_DIR}:ro \
-v $LOGFILE:"/var/log/rstudio-server/rserver.log":rw\
-u="yarnapp" \
-w="$SECRET_DIR" \
$IMAGE & \

# Wait for rstudio to start
timeout=0
while [ $timeout -lt $WAIT_START ] ; do
docker logs $(cat $PID_FILE) | grep "...done"
if [ $? -eq 0 ] ; then
break
fi
echo -n "."
timeout=$((timeout + 1))
done
echo ""

# If the timeout was exceeded, kill rstudio
if [ "$timeout" -eq $WAIT_START ] ; then
kill_named
fi


exit $?
Loading

0 comments on commit d4ad939

Please sign in to comment.