diff --git a/modules/slurm-cluster/main.tf b/modules/slurm-cluster/main.tf new file mode 100644 index 000000000..3093b030b --- /dev/null +++ b/modules/slurm-cluster/main.tf @@ -0,0 +1,50 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +locals { + wl_templates = [ + for f in fileset(local.wl_templates_path, "[0-9]*yml") : + "${local.wl_templates_path}/${f}" + ] + wl_templates_path = ( + var.templates_path == null + ? "${path.module}/manifest-templates" + : pathexpand(var.templates_path) + ) +} + +resource "kubernetes_namespace" "default" { + count = var.namespace_create ? 1 : 0 + metadata { + name = var.namespace + } +} + +resource "kubernetes_manifest" "default" { + for_each = toset(local.wl_templates) + manifest = yamldecode(templatefile(each.value, { + namespace = var.namespace + cluster_config = var.cluster_config + })) + + timeouts { + create = "30m" + } + field_manager { + force_conflicts = true + } +} diff --git a/modules/slurm-cluster/manifest-templates/00-configmap-slurm-config.yml b/modules/slurm-cluster/manifest-templates/00-configmap-slurm-config.yml new file mode 100644 index 000000000..c27cde51c --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/00-configmap-slurm-config.yml @@ -0,0 +1,123 @@ +# +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +apiVersion: v1 +kind: ConfigMap +metadata: + name: slurm-conf-configmap + namespace: ${namespace} +data: + slurm.conf: | + # slurm.conf + # + # See the slurm.conf man page for more information. + # + ClusterName=linux + SlurmctldHost=slurmctld-0 + # + SlurmUser=slurm + SlurmctldPort=6820-6830 + SlurmdPort=6818 + AuthType=auth/munge + StateSaveLocation=/var/spool/slurmctld + SlurmdSpoolDir=/var/spool/slurmd + SwitchType=switch/none + MpiDefault=pmix + SlurmctldPidFile=/var/run/slurmd/slurmctld.pid + SlurmdPidFile=/var/run/slurmd/slurmd.pid + ProctrackType=proctrack/linuxproc + ReturnToService=2 + # + # TIMERS + SlurmctldTimeout=300 + SlurmdTimeout=30 + InactiveLimit=0 + MinJobAge=300 + KillWait=30 + Waittime=0 + # + # SCHEDULING + SchedulerType=sched/backfill + SelectType=select/cons_tres + SelectTypeParameters=CR_CPU_Memory + # + # LOGGING + SlurmctldDebug=3 + SlurmctldLogFile=/var/log/slurm/slurmctld.log + SlurmdDebug=3 + SlurmdLogFile=/var/log/slurm/slurmd.log + JobCompType=jobcomp/filetxt + JobCompLoc=/var/log/slurm/jobcomp.log + # + # ACCOUNTING + JobAcctGatherType=jobacct_gather/linux + JobAcctGatherFrequency=30 + # + AccountingStorageType=accounting_storage/slurmdbd + AccountingStorageHost=slurmdbd + AccountingStoragePort=6819 + # + SlurmctldParameters=cloud_reg_addrs + + # CLOUD CONFIGURATIONS + MaxNodeCount=64000 + include cloud.conf + cloud.conf: | + PrivateData=cloud + SlurmctldParameters=enable_configless + ## GRES + GresTypes=gpu + AccountingStorageTRES=gres/gpu + DebugFlags=Gres + TreeWidth=128 + + # NODES + NodeName=DEFAULT State=UNKNOWN RealMemory=15000 CPUs=4 CoresPerSocket=2 ThreadsPerCore=2 Gres=gpu:1 + NodeName=slurmd-[0-39] State=CLOUD Gres=gpu:1 + NodeSet=slurmdnodeset Nodes=slurmd-[0-39] + + NodeName=DEFAULT State=UNKNOWN RealMemory=30000 CPUs=8 CoresPerSocket=2 ThreadsPerCore=2 Gres=gpu:2 + NodeName=slurmd1-[0-39] State=CLOUD Gres=gpu:2 + NodeSet=slurmd1nodeset Nodes=slurmd1-[0-39] + + # PARTITIONS + PartitionName=all Default=yes Nodes=ALL MaxTime=INFINITE State=UP + + PropagateResourceLimitsExcept=MEMLOCK + + PartitionName=1gpunodes Nodes=slurmdnodeset State=UP DefMemPerCPU=7007 SuspendTime=300 Oversubscribe=Exclusive PowerDownOnIdle=YES ResumeTimeout=300 SuspendTimeout=120 + PartitionName=2gpunodes Nodes=slurmd1nodeset State=UP DefMemPerCPU=7007 SuspendTime=300 Oversubscribe=Exclusive PowerDownOnIdle=YES ResumeTimeout=300 SuspendTimeout=120 + + cloud_gres.conf: | + NodeName=slurmd-[0-39] Name=gpu File=/dev/nvidia0 + NodeName=slurmd1-[0-39] Name=gpu File=/dev/nvidia[0-1] + gres.conf: | + NodeName=slurmd-[0-39] Name=gpu File=/dev/nvidia0 + NodeName=slurmd1-[0-39] Name=gpu File=/dev/nvidia[0-1] + cgroup.conf: | + ### + # + # Slurm cgroup support configuration file + # + # See man slurm.conf and man cgroup.conf for further + # information on cgroup configuration parameters + #-- + ConstrainCores=yes + ConstrainDevices=yes + ConstrainRAMSpace=yes + ConstrainSwapSpace=yes + IgnoreSystemd=yes + \ No newline at end of file diff --git a/modules/slurm-cluster/manifest-templates/00-configmap-slurmdb-config.yml b/modules/slurm-cluster/manifest-templates/00-configmap-slurmdb-config.yml new file mode 100644 index 000000000..9460060e7 --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/00-configmap-slurmdb-config.yml @@ -0,0 +1,49 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: v1 +kind: ConfigMap +metadata: + name: slurmdbd-conf-configmap + namespace: ${namespace} +data: + slurmdbd.conf: | + # + # Example slurmdbd.conf file. + # + # See the slurmdbd.conf man page for more information. + # + # Authentication info + AuthType=auth/munge + # + # slurmDBD info + DbdAddr=slurmdbd + DbdHost=slurmdbd + SlurmUser=slurm + DebugLevel=4 + LogFile=/var/log/slurm/slurmdbd.log + PidFile=/var/run/slurmdbd/slurmdbd.pid + # + # Database info + StorageType=accounting_storage/mysql + StorageHost=${cluster_config.database.host} + StorageUser=${cluster_config.database.user} diff --git a/modules/slurm-cluster/manifest-templates/00-secret-database-auth.yml b/modules/slurm-cluster/manifest-templates/00-secret-database-auth.yml new file mode 100644 index 000000000..2379b803e --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/00-secret-database-auth.yml @@ -0,0 +1,29 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: v1 +kind: Secret +metadata: + name: database-auth-secret + namespace: ${namespace} +data: + password: ${cluster_config.database.password} diff --git a/modules/slurm-cluster/manifest-templates/00-secret-munge-key.yml b/modules/slurm-cluster/manifest-templates/00-secret-munge-key.yml new file mode 100644 index 000000000..dbea49a61 --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/00-secret-munge-key.yml @@ -0,0 +1,29 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: v1 +kind: Secret +metadata: + name: munge-key-secret + namespace: ${namespace} +data: + munge.key: ${base64encode(cluster_config.munge.key)} diff --git a/modules/slurm-cluster/manifest-templates/01-pvc-slurm-shared-storage.yml b/modules/slurm-cluster/manifest-templates/01-pvc-slurm-shared-storage.yml new file mode 100644 index 000000000..e77ed2bf1 --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/01-pvc-slurm-shared-storage.yml @@ -0,0 +1,34 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: slurm-shared-storage + namespace: ${namespace} +spec: + storageClassName: standard-rwx + accessModes: + - ReadWriteMany + resources: + requests: + storage: ${cluster_config.storage.size_gb}Gi diff --git a/modules/slurm-cluster/manifest-templates/01-pvc-var-lib-mysql.yml b/modules/slurm-cluster/manifest-templates/01-pvc-var-lib-mysql.yml new file mode 100644 index 000000000..432a6cc71 --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/01-pvc-var-lib-mysql.yml @@ -0,0 +1,36 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: mysql + name: var-lib-mysql + namespace: ${namespace} +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: ${cluster_config.database.storage_size_gb}Gi diff --git a/modules/slurm-cluster/manifest-templates/01-pvc-var-spool-slurmctld.yml b/modules/slurm-cluster/manifest-templates/01-pvc-var-spool-slurmctld.yml new file mode 100644 index 000000000..71d099fbb --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/01-pvc-var-spool-slurmctld.yml @@ -0,0 +1,36 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmctld + name: var-spool-slurmctld + namespace: ${namespace} +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi diff --git a/modules/slurm-cluster/manifest-templates/02-svc-deployment-login.yml b/modules/slurm-cluster/manifest-templates/02-svc-deployment-login.yml new file mode 100644 index 000000000..f16d5b8dd --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/02-svc-deployment-login.yml @@ -0,0 +1,86 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: login + name: login + namespace: ${namespace} +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: login + strategy: + type: Recreate + template: + metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: login + spec: + containers: + - args: + - login + image: ${cluster_config.image} + name: login + ports: + - containerPort: 22 + volumeMounts: + - mountPath: ${cluster_config.storage.mount_path} + name: slurm-jobdir + - mountPath: /etc/slurm/ + name: slurm-config-volume + - mountPath: /tmp/munge.key + name: munge-key-secret + subPath: munge.key + hostname: login + dnsPolicy: "None" + dnsConfig: + nameservers: + - "169.254.169.254" + searches: + - slurmd.${namespace}.svc.cluster.local + - slurmd1.${namespace}.svc.cluster.local + - slurmd2.${namespace}.svc.cluster.local + - svc.cluster.local + - cluster.local + - ${namespace}.svc.cluster.local + options: + - name: ndots + value: "5" + restartPolicy: Always + volumes: + - name: slurm-jobdir + persistentVolumeClaim: + claimName: slurm-shared-storage + - name: slurm-config-volume + configMap: + name: slurm-conf-configmap + - name: munge-key-secret + secret: + secretName: munge-key-secret + defaultMode: 0400 diff --git a/modules/slurm-cluster/manifest-templates/02-svc-deployment-mysql.yml b/modules/slurm-cluster/manifest-templates/02-svc-deployment-mysql.yml new file mode 100644 index 000000000..aec6db619 --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/02-svc-deployment-mysql.yml @@ -0,0 +1,70 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: mysql + name: mysql + namespace: ${namespace} +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: mysql + strategy: + type: Recreate + template: + metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: mysql + spec: + containers: + - env: + - name: MYSQL_DATABASE + value: slurm_acct_db + - name: MYSQL_PASSWORD + valueFrom: + secretKeyRef: + name: database-auth-secret + key: password + - name: MYSQL_RANDOM_ROOT_PASSWORD + value: "yes" + - name: MYSQL_USER + value: "slurm" + image: mariadb:10.10 + name: mysql + ports: + - containerPort: 3306 + volumeMounts: + - mountPath: /var/lib/mysql + name: var-lib-mysql + hostname: mysql + restartPolicy: Always + volumes: + - name: var-lib-mysql + persistentVolumeClaim: + claimName: var-lib-mysql diff --git a/modules/slurm-cluster/manifest-templates/02-svc-deployment-slurmdbd.yml b/modules/slurm-cluster/manifest-templates/02-svc-deployment-slurmdbd.yml new file mode 100644 index 000000000..8c470c468 --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/02-svc-deployment-slurmdbd.yml @@ -0,0 +1,95 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmdbd + name: slurmdbd + namespace: ${namespace} +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmdbd + strategy: + type: Recreate + template: + metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmdbd + spec: + containers: + - args: + - slurmdbd + - -vvv + image: ${cluster_config.image} + name: slurmdbd + ports: + - containerPort: 6819 + volumeMounts: + - mountPath: /etc/slurm/slurm.conf + name: slurm-config-volume + subPath: slurm.conf + - mountPath: /tmp/munge.key + name: munge-key-secret + subPath: munge.key + - mountPath: /tmp/slurmdbd.conf + name: dbd-config-volume + subPath: slurmdbd.conf + env: + - name: StoragePass + valueFrom: + secretKeyRef: + name: database-auth-secret + key: password + dnsPolicy: "None" + dnsConfig: + nameservers: + - "169.254.169.254" + searches: + - slurmd.${namespace}.svc.cluster.local + - slurmd1.${namespace}.svc.cluster.local + - slurmd2.${namespace}.svc.cluster.local + - svc.cluster.local + - cluster.local + - ${namespace}.svc.cluster.local + options: + - name: ndots + value: "5" + hostname: slurmdbd + restartPolicy: Always + volumes: + - name: dbd-config-volume + configMap: + name: slurmdbd-conf-configmap + - name: slurm-config-volume + configMap: + name: slurm-conf-configmap + - name: munge-key-secret + secret: + secretName: munge-key-secret + defaultMode: 0400 diff --git a/modules/slurm-cluster/manifest-templates/03-svc-statefulset-slurmctld.yml b/modules/slurm-cluster/manifest-templates/03-svc-statefulset-slurmctld.yml new file mode 100644 index 000000000..d3ca95397 --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/03-svc-statefulset-slurmctld.yml @@ -0,0 +1,103 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmctld + name: slurmctld + namespace: ${namespace} +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmctld + template: + metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmctld + spec: + containers: + - args: + - slurmctld + - -vvv + - -i + - -D + image: ${cluster_config.image} + name: slurmctld + ports: + - containerPort: 6817 + - containerPort: 6820 + - containerPort: 6821 + - containerPort: 6822 + - containerPort: 6823 + - containerPort: 6824 + - containerPort: 6825 + - containerPort: 6826 + - containerPort: 6827 + - containerPort: 6828 + - containerPort: 6829 + - containerPort: 6830 + resources: {} + volumeMounts: + - mountPath: ${cluster_config.storage.mount_path} + name: slurm-jobdir + - mountPath: /etc/slurm/ + name: slurm-config-volume + - mountPath: /tmp/munge.key + name: munge-key-secret + subPath: munge.key + - mountPath: /var/spool/slurmctld + name: slurmctld-state + dnsPolicy: "None" + dnsConfig: + nameservers: + - "169.254.169.254" + searches: + - slurmd.${namespace}.svc.cluster.local + - slurmd1.${namespace}.svc.cluster.local + - slurmd2.${namespace}.svc.cluster.local + - svc.cluster.local + - cluster.local + - ${namespace}.svc.cluster.local + options: + - name: ndots + value: "5" + restartPolicy: Always + volumes: + - name: slurm-jobdir + persistentVolumeClaim: + claimName: slurm-shared-storage + - name: slurmctld-state + persistentVolumeClaim: + claimName: var-spool-slurmctld + - name: slurm-config-volume + configMap: + name: slurm-conf-configmap + - name: munge-key-secret + secret: + secretName: munge-key-secret + defaultMode: 0400 diff --git a/modules/slurm-cluster/manifest-templates/04-svc-mysql.yml b/modules/slurm-cluster/manifest-templates/04-svc-mysql.yml new file mode 100644 index 000000000..8b1816cbf --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/04-svc-mysql.yml @@ -0,0 +1,38 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: mysql + name: mysql + namespace: ${namespace} +spec: + ports: + - name: mysql + port: 3306 + targetPort: 3306 + selector: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: mysql diff --git a/modules/slurm-cluster/manifest-templates/04-svc-slurmctld-0.yml b/modules/slurm-cluster/manifest-templates/04-svc-slurmctld-0.yml new file mode 100644 index 000000000..25222b256 --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/04-svc-slurmctld-0.yml @@ -0,0 +1,71 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmctld + name: slurmctld-0 + namespace: ${namespace} +spec: + ports: + - name: slurmctld + port: 6817 + targetPort: 6817 + - name: slurmctld-20 + port: 6820 + targetPort: 6820 + - name: slurmctld-21 + port: 6821 + targetPort: 6821 + - name: slurmctld-22 + port: 6822 + targetPort: 6822 + - name: slurmctld-23 + port: 6823 + targetPort: 6823 + - name: slurmctld-24 + port: 6824 + targetPort: 6824 + - name: slurmctld-25 + port: 6825 + targetPort: 6825 + - name: slurmctld-26 + port: 6826 + targetPort: 6826 + - name: slurmctld-27 + port: 6827 + targetPort: 6827 + - name: slurmctld-28 + port: 6828 + targetPort: 6828 + - name: slurmctld-29 + port: 6829 + targetPort: 6829 + - name: slurmctld-30 + port: 6830 + targetPort: 6830 + selector: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmctld diff --git a/modules/slurm-cluster/manifest-templates/04-svc-slurmdb.yml b/modules/slurm-cluster/manifest-templates/04-svc-slurmdb.yml new file mode 100644 index 000000000..0ebb34cef --- /dev/null +++ b/modules/slurm-cluster/manifest-templates/04-svc-slurmdb.yml @@ -0,0 +1,38 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmdbd + name: slurmdbd + namespace: ${namespace} +spec: + ports: + - name: slurmdbd + port: 6819 + targetPort: 6819 + selector: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmdbd diff --git a/modules/slurm-cluster/outputs.tf b/modules/slurm-cluster/outputs.tf new file mode 100644 index 000000000..92785d1de --- /dev/null +++ b/modules/slurm-cluster/outputs.tf @@ -0,0 +1,25 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +output "namespace" { + description = "Slurm cluster namespace" + value = var.namespace +} + +output "image" { + description = "Slurm cluster image" + value = var.cluster_config.image +} diff --git a/modules/slurm-cluster/variables.tf b/modules/slurm-cluster/variables.tf new file mode 100644 index 000000000..41ab625a3 --- /dev/null +++ b/modules/slurm-cluster/variables.tf @@ -0,0 +1,59 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "namespace" { + description = "Namespace used for Slurm cluster resources." + type = string + nullable = false + default = "default" +} + +variable "namespace_create" { + description = "Create namespace use for Slurm cluster resources." + type = bool + nullable = false + default = false +} + +variable "cluster_config" { + description = "Configure Slurm cluster statefulset parameters." + type = object({ + name = optional(string, "linux") + image = optional(string, "") + database = object({ + create = optional(bool, true) + host = optional(string, "mysql") + user = optional(string, "slurm") + password = optional(string, "") + storage_size_gb = optional(number, 1) + }) + munge = object({ + key = optional(string, "") + }) + storage = optional(object({ + mount_path = optional(string, "/home") + type = optional(string, "filestore") + size_gb = optional(number, 100) + })) + }) + nullable = false +} + +variable "templates_path" { + description = "Path where manifest templates will be read from. Set to null to use the default manifests" + type = string + default = null +} diff --git a/modules/slurm-nodeset/main.tf b/modules/slurm-nodeset/main.tf new file mode 100644 index 000000000..6235dacea --- /dev/null +++ b/modules/slurm-nodeset/main.tf @@ -0,0 +1,59 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +locals { + wl_templates = [ + for f in fileset(local.wl_templates_path, "[0-9]*yml") : + "${local.wl_templates_path}/${f}" + ] + wl_templates_path = ( + var.templates_path == null + ? "${path.module}/manifest-templates" + : pathexpand(var.templates_path) + ) + + config = ( + strcontains(var.config.type, "n1-standard") + ? { + gpu_instances = var.config.accelerator.count + gpu = var.config.accelerator.type + } + : { + gpu_instances = var.accelerator_types[var.config.type].count + gpu = var.accelerator_types[var.config.type].type + } + ) +} + +resource "kubernetes_manifest" "default" { + for_each = toset(local.wl_templates) + manifest = yamldecode(templatefile(each.value, { + name = var.name + namespace = var.config.namespace + image = var.config.image + instances = var.config.instances + config = local.config + })) + + timeouts { + create = "30m" + } + field_manager { + force_conflicts = true + } +} + diff --git a/modules/slurm-nodeset/manifest-templates/03-svc-statefulset-slurmd.yml b/modules/slurm-nodeset/manifest-templates/03-svc-statefulset-slurmd.yml new file mode 100644 index 000000000..09c8fb83f --- /dev/null +++ b/modules/slurm-nodeset/manifest-templates/03-svc-statefulset-slurmd.yml @@ -0,0 +1,124 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmd + name: ${name} + namespace: ${namespace} +spec: + replicas: ${instances} + selector: + matchLabels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmd + serviceName: ${name} + template: + metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmd + spec: + nodeSelector: + cloud.google.com/gke-accelerator: ${config.gpu} + tolerations: + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" + containers: + - args: + - slurmd + - -D + - -s + - -vvv + - --conf-server="slurmctld-0:6820-6830" + - -Z + - -N + - "$(POD_NAME).${name}" + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + image: ${image} + imagePullPolicy: Always + name: slurmd + readinessProbe: + tcpSocket: + port: 6818 + failureThreshold: 3 + initialDelaySeconds: 1 + periodSeconds: 20 + successThreshold: 1 + timeoutSeconds: 1 + ports: + - containerPort: 6818 + hostPort: 6818 + resources: + limits: + nvidia.com/gpu: ${config.gpu_instances} + cpu: 3 + memory: "10Gi" + requests: + nvidia.com/gpu: ${config.gpu_instances} + cpu: 3 + memory: "10Gi" + volumeMounts: + - mountPath: /run/dbus/system_bus_socket + name: system-bus-socket + - mountPath: /tmp/munge.key + name: munge-key-secret + subPath: munge.key + - mountPath: /home + name: slurm-jobdir + securityContext: + privileged: true + hostNetwork: true + dnsPolicy: "None" + dnsConfig: + nameservers: + - "169.254.169.254" + searches: + - slurmd.${namespace}.svc.cluster.local + - slurmd1.${namespace}.svc.cluster.local + - slurmd2.${namespace}.svc.cluster.local + - svc.cluster.local + - cluster.local + - ${namespace}.svc.cluster.local + options: + - name: ndots + value: "5" + restartPolicy: Always + volumes: + - name: system-bus-socket + hostPath: + path: /run/dbus/system_bus_socket + - name: slurm-jobdir + persistentVolumeClaim: + claimName: slurm-shared-storage + - name: munge-key-secret + secret: + secretName: munge-key-secret + defaultMode: 0400 diff --git a/modules/slurm-nodeset/manifest-templates/04-svc-slurmd.yml b/modules/slurm-nodeset/manifest-templates/04-svc-slurmd.yml new file mode 100644 index 000000000..5abd1bdec --- /dev/null +++ b/modules/slurm-nodeset/manifest-templates/04-svc-slurmd.yml @@ -0,0 +1,39 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmd + name: ${name} + namespace: ${namespace} +spec: + ports: + - name: slurmd + port: 6818 + targetPort: 6818 + selector: + app.kubernetes.io/name: slurm + app.kubernetes.io/component: slurmd + clusterIP: None diff --git a/modules/slurm-nodeset/variables.tf b/modules/slurm-nodeset/variables.tf new file mode 100644 index 000000000..bf51e7a9e --- /dev/null +++ b/modules/slurm-nodeset/variables.tf @@ -0,0 +1,90 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "name" { + description = "Name used for Slurm worker nodes configuration" + type = string + nullable = false +} + +variable "templates_path" { + description = "Path where manifest templates will be read from. Set to null to use the default manifests." + type = string + default = null +} + +variable "config" { + type = object({ + type = string # Machine Type + instances = number # Number of Slurm instances + namespace = string # Cluster namespace + image = string #"Container image used for Slurm cluster pods" + accelerator = optional(object({ + type = string # "Needed when the configured instances is an N1-standard-* since GPUs are not known in advanced." + count = number + })) + storage = optional(object({ + mount_path = optional(string, "/home") + type = optional(string, "filestore") + })) + }) + default = null +} + +variable "accelerator_types" { + type = map(object({ + type = optional(string) + count = optional(number) + cpu = optional(number) + memory = optional(number) + })) + + default = { + "g2-standard-4" = { + type = "nvidia-l4" + count = 1 + } + "g2-standard-8" = { + type = "nvidia-l4" + count = 1 + } + "g2-standard-12" = { + type = "nvidia-l4" + count = 1 + } + "g2-standard-16" = { + type = "nvidia-l4" + count = 1 + } + "g2-standard-24" = { + type = "nvidia-l4" + count = 2 + } + "g2-standard-32" = { + type = "nvidia-l4" + count = 1 + } + "g2-standard-48" = { + type = "nvidia-l4" + count = 4 + } + "g2-standard-96" = { + type = "nvidia-l4" + count = 8 + } + } +} + diff --git a/slurm-on-gke/.gitignore b/slurm-on-gke/.gitignore new file mode 100644 index 000000000..4b4e41cc8 --- /dev/null +++ b/slurm-on-gke/.gitignore @@ -0,0 +1,5 @@ +.terraform* +terraform.tfstate +terraform.tfstate.backup +terraform.tfvars +*/.terraform diff --git a/slurm-on-gke/README.md b/slurm-on-gke/README.md new file mode 100644 index 000000000..1a5a3a504 --- /dev/null +++ b/slurm-on-gke/README.md @@ -0,0 +1,445 @@ +# Slurm on GKE + +## Introduction + +This guide shows you how to deploy [Slurm](https://slurm.schedmd.com/documentation.html) on a Google Kubernetes Engine (GKE) cluster. + +Slurm (formerly known as Simple Linux Utility for Resource Management) is a powerful open-source workload manager that’s designed for Linux and Unix-like systems. It's used extensively in high performance computing (HPC) environments, including many of the world's supercomputers and large computer clusters. + +Slurm uses a centralized manager (`slurmctld`) to monitor resources and work, with an optional backup manager for fault tolerance. Each compute server (node) runs a `slurmd` daemon to execute work. An optional `slurmdbd` can record accounting information for multiple Slurm-managed clusters. More information about the Slurm architecture can be found on the [SchedMD documentation website](https://slurm.schedmd.com/overview.html#architecture). + +This guide is intended for platform administrators in an enterprise environment who are already managing Kubernetes or GKE clusters, and who need to set up Slurm clusters for AI/ML teams on Kubernetes. This guide is also for AI/ML startups that already use Kubernetes or GKE to run their workloads, such as inference workloads or web apps, and want to use their existing infrastructure to run training workloads with a Slurm interface. AI/ML teams that are already use Slurm can continue to use a familiar interface while onboarding on to Kubernetes or GKE. + +## Benefits + +Because Slurm is a workload orchestrator, it might seem counterintuitive to run it on Kubernetes–and this scenario is not a common practice. However, a team in an enterprise or an AI startup might run Slurm on Kubernetes for the following reasons: + +* To avoid the sparse allocation of scarce resources, such as GPUs, between different infrastructures–for instance, splitting GPUs between Kubernetes clusters and Slurm clusters running on VMs. +* To help teams that are familiar with Slurm, and less familiar with Kubernetes, learn how to use Kubernetes more quickly. These teams typically already use Kubernetes to run other types of workloads, such as inference workloads, web apps, or stateful apps. + +This solution lets a platform administrator team with previous Kubernetes experience, or an AI/ML team with little-to-no Kubernetes experience, set up the cluster with ready-to-use Terraform modules. + +## Document scope + +This solution, and the Slurm setup presented here, does not support all the Slurm features, and is not intended to be used outside the AI/ML domain. This guide is also not a recommendation document about how or when to use Slurm for HPC workloads. + +The following topics are also out of scope for this guide: + +* How to integrate the various combinations of the Google Cloud VM types. +* How to dynamically split Kubernetes nodes between two orchestrators. +* How to do any advanced configuration that’s different from the one that’s presented in the introduction. + +If you are searching for a guide or an automation tool that can help you set up Slurm on Google Cloud, with a focus on HPC workloads, we recommend that you use the [Google Cloud Cluster Toolkit](https://cloud.google.com/cluster-toolkit/docs/overview). + +## Solution architecture + +The implemented architecture is composed of multiple StatefulSets and Deployments in order to cover the different Slurm components. The following diagram shows a Slurm control plane and a data plane that are running in GKE. + +![Slurm on GKE - Architecture](solution-architecture.png) + +Although the function of each component won’t be described in detail, it’s important to highlight how each component is configured and how it can be customized. The previous diagram identifies two main components: the Slurm control plane and the data plane. + +* The Slurm control pane is hosted on `nodepool-nogpu-1`, and it contains at least three services: + * `The` `slurmctld-0` Pod runs `slurmctld`, which is piloted by the `slurmctld` StatefulSet. + * The `slurmdbd` Pod, which is controlled by a Deployment. + * The login Pod. +* The data plane consists of two different `slurmd-X` instances: + * The first instance, called `slurmd-00`, is hosted on the `G2-standard-4` node pool. + * The second instance, called `slurmd-01`, is hosted on the `N1-standard-8` node pool. + +### Control plane + +Before proceeding further, review the Terraform module configuration for this part of the Cluster. + +```HCL +module "slurm-cluster-001" { + source = "../modules/slurm-cluster/" + namespace = "slurm" + namespace_create = true + + cluster_config = { + name = "linux" + image = "IMAGE_URL_FROM_ARTIFACT_REGISTRY" + database = { + create = true + storage_size_gb = 1 + host = "mysql" + password = "SET_HERE_A_BASE64_ENCODED_PASSWORD" + } + storage = { + size_gb = 100 + type = "filestore" + mount_path = "/home" + } + munge = { + key = "PUT_HERE_YOUR_KEY" + } +} +``` + +By using `namespace` and `namespace_create`, you can specify where to run the control plane. Creating the namespace can be automatic if `namespace_create` is set to `true`. + +The `cluster_config` block, which the previous example code passes directly in the module for readability, specifies the following: + +* All the customizable options of the solution. +* The container image to use to run the cluster. +* The database. +* The storage. +* Munge parameters. + +The `database` block can auto-create a new MariaDB instance, which will be hosted on the cluster with an external volume. Alternatively, you can provide credentials to connect to a compatible external MySQL service, such as [CloudSQL](https://cloud.google.com/sql?hl=en). + +The `storage` block specifies where to mount the shared filesystem in the data plane pods (`slurmd`) that will execute the jobs. The configuration of all the Slurm components, except for the database, will auto-mount on the specified shared filesystem. + +The `munge` block specifies the Munge key that’s used for the secure communication between the cluster nodes. If you’re not familiar with it or with any previous Slurm setup, and you want to create a new one, the Munge key must be 32 kB. The Munge client has a [specific command](https://manpages.ubuntu.com/manpages/focal/man8/create-munge-key.8.html) to create a Munge key, and the SchedMD documentation also describes [another approach](https://slurm.schedmd.com/authentication.html#munge_setup) to creating a Munge key by using `dd`. + +For more information related to the image, see the Image section of this document. + +### Data plane and worker nodes + +The following streamlined configuration for a new set of worker nodes requires only a few parameters: + +```HCL +# example - 1 +module "slurm-workers-001" { + source = "../modules/slurm-nodeset" + name = "slurmd" + config = { + type = "g2-standard-4" + instances = 2 + namespace = module.slurm-cluster-001.namespace + image = var.config.image + } +} + +# example - 2 +module "slurm-workers-002" { + source = "../modules/slurm-nodeset" + name = "slurmd1" + config = { + type = "n1-standard-8" + instances = 1 + namespace = module.slurm-cluster-001.namespace + image = var.config.image + accelerator = { + type = "nvidia-tesla-t4" + count = 2 + } + } +} +``` + +The `config` block specifies nodeset names. To name nodesets, we recommend that you use names such as `slurmd`, `slurmd1`, or `slurmd2`. For more information about this topic, see the Limitations section of this document. + +In addition to nodeset names, you can use the `config` block to configure the following: + +* The type of the instance, which includes all the instance types that GKE supports. For example, N1, C2, A2, or A3. +* The number of instances to dedicate to Slurm. +* The namespace. +* The address of the Slurm image. + +The previous example creates two nodesets: the first nodeset is created on the `g2-standard-4` instances, and the second one is created on the `n1-standard-8 instance`s. Although the configurations for both instance types are similar, N1 instances require some additional parameters because they are machines that [are not bound to a specific GPU version](https://cloud.google.com/compute/docs/gpus). Because you can choose the GPU before the machine is created–for example, when you choose the `n1-standard` machine type–the Terraform module requires you to specify the number of GPUs and the related type. + +## Limitations + +Because Slurm is a workload orchestrator, it overlaps with some of the components, scope, and features of Kubernetes. For example, both Slurm and Kubernetes can scale up a new node to accommodate requested resources, or allocate resources within each single node. + +Although Kubernetes shouldn’t limit the number of StatefulSets that can be added to the Slurm cluster, this solution addresses DNS names and pointing by injecting the names of the StatefulSets directly into the manifests. This type of injection supports up to five static names, which include the three StatefulSets and two additional system domains. + +Because of this limitation, we recommend that you have no more than three nodesets per cluster and to always use nodeset names such as `slurmd`, `slurmd-1,` or `slurmd-2`. + +## Image + +The provided image configuration files, which are available in the `image` directory of the repository, contain all the needed Slurm binaries. It's a best practice to have one container image for each purpose, and Google recommends that you create different images for each purpose before you put this architecture in production. + +The provided Dockerfile and the `docker-entrypoint.sh` file are used to create the base Ubuntu-based image for the setup. Although it can be used, it's not maintained and should be considered only for use in experimentation and testing. + +Another best practice is to tag your images with the NVIDIA driver version that you will install. Doing so will help you manage different images because the tags match the NVIDIA driver that’s installed onto your GKE cluster. + +## Notes and requirements + +The current version of this guide does **not** address running the login pod with users different from root. +The following are required to be present, configured or available: + +* terraform v1.9.3 or newer +* docker 20.10.21 or newer +* kubectl +* A Google Cloud organization + +## Infrastructure and GKE + +This section describes the steps for creating and storing the container image. + +### Set up your environment + +In this tutorial, you use [Cloud Shell](https://cloud.google.com/shell) to manage resources that are hosted on Google Cloud. Cloud Shell is preinstalled with the software that you need for this tutorial, including [`kubectl`](https://kubernetes.io/docs/reference/kubectl/), the [Google Cloud CLI](https://cloud.google.com/sdk/gcloud), [Helm](https://helm.sh/), and [Terraform](https://cloud.google.com/docs/terraform). + +To set up your environment with Cloud Shell, follow these steps: + +1\. In the [Google Cloud console](http://console.cloud.google.com), launch a Cloud Shell session by clicking *Activate Cloud Shell*. This launches a session in the bottom pane of the Google Cloud console. + +2\. Set environment variables. + +```bash +export PROJECT_ID=YOUR_PROJECT_ID +export REGION=europe-west3 +``` + +Replace *YOUR_PROJECT_ID* with your Google Cloud [project ID](https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects). + +3\. Set the default environment variables. + +```bash +gcloud config set project ${PROJECT_ID} +``` + +4\. Clone the code repository. + +```bash +git clone https://github.com/GoogleCloudPlatform/ai-on-gke +``` + +### Create your cluster infrastructure + +In this section, you run a Terraform script to create a private, highly available, regional GKE cluster. + +The Terraform module will also create a new project and anything that might be needed to set up the environment described in this guide. If you already have a GKE cluster where you can test the Slurm installation, you can skip this step. + +5\. Initialize Terraform. + +```bash +cd slurm-on-gke +cd infrastructure +terraform init +``` + +6\. Create the `terraform.tfvars` file with your own values. + +A `terraform.tfvars` file should be provided with all the values for the required variables. In the file, enter your own values as follows: + +```HCL +impersonate_service_account = "YOUR_SERVICE_ACCOUNT_ID" <-- if you are using one +region = "europe-west3" +project_id = "YOUR_PROJECT_ID" +billing_account_id = "YOUR_BILLING_ACCOUNT_ID" +folder_id = "folders/FOLDER_ID" +``` + +**Note:** Ensure your selected region or zone offers GPU availability. Consult the [Google Cloud documentation for a complete list](https://cloud.google.com/compute/docs/gpus/gpu-regions-zones). + +7\. After you fill out the file, use the following command to apply the Terraform configuration and create the infrastructure. + +```bash +terraform apply +``` + +When you are prompted, type `yes`. It might take several minutes for this command to complete and for the cluster to show a ready status. + +Terraform creates the following resources: + +* A Google Cloud project. +* A VPC network, a private subnet for the Kubernetes nodes, and a proxy-only subnet for the load balancers. +* A firewall rule to open the SSH protocol from the Identity-Aware Proxy (IAP) ranges. +* A router to access the internet through NAT. +* An Artifact Registry repository to host the Slurm image. +* A private GKE cluster in the `europe-west3` region. +* One node pool with autoscaling enabled (1-2 nodes per zone, 1 node per zone minimum) +* One node pool with enabled autoscaling and GPUs (1-2 nodes per zone, 1 node per zone minimum) +* Two ServiceAccount with logging, monitoring permissions, and Artifact Registry read permissions. +* Google Cloud Managed Service for Prometheus configuration for cluster monitoring. + +The output is similar to the following: + +```bash +... +Apply complete! Resources: 39 added, 0 changed, 0 destroyed. +``` + +An additional, commented-out Terraform configuration is already written over the `infrastructure/slurm.tf` file. The additional configuration is an example configuration for a `g2-standard-4 node` pool. + +### Create the image + +In this section, you build and store the container image that you will use to deploy Slurm over the newly created or provided GKE cluster. + +To build the container image, use the following commands: + +```bash +cd .. # (you were in the infrastructure directory) +cd image docker build -t europe-west3-docker.pkg.dev/$PROJECT_ID/slurm/slurmd:535 . +docker push europe-west3-docker.pkg.dev/$PROJECT_ID/slurm/slurmd:535 +``` + +The output is similar to the following: + +```bash +... +The push refers to repository [europe-west3-docker.pkg.dev/YOUR_PROJECT_ID/slurm/slurmd] +df1644670bb2: Pushed +74f700e9690e: Pushed +676e6ba12678: Pushed +578df7510db0: Pushed +6551dcc8d929: Pushed +ddcaaa531045: Pushed +98c2ee5d21b6: Pushed +866b7df6f372: Pushed +139722e64731: Pushed +87c242b383a9: Pushed +1b9b7340fee7: Pushed +535: digest: sha256:ced97f7cb5d0eba7114a1909c2he2e2ke21a6db1b36669a41f34a3 +size: 2632 +``` + +Note the address of your container image because it will be requested in the following steps.t The address should be similar to the following: + +```bash +europe-west3-docker.pkg.dev/$PROJECT_ID/slurm/slurmd:535 +``` + +### Deploy Slurm + +In this section, you deploy the Slurm cluster over the newly created or provided GKE cluster. + +1\. Return to the repository root directory. + +```bash +cd .. # (we were in the image directory) +``` + +2\. Create the `terraform.tfvars` file with your own values. + +A `terraform.tfvars` file should be provided with all the values for the required variables. In the file, enter your own values as follows: + +```HCL +region = "europe-west3" +project_id = "YOUR_PROJECT_ID" +cluster_name = "cluster-1" + +config = { + name = "linux" + image = "europe-west3-docker.pkg.dev/YOUR_PROJECT_ID/slurm/slurmd:535" + database = { + create = true + storage_size_gb = 1 + host = "mysql" + password = "SET_HERE_A_BASE64_ENCODED_PASSWORD" + } + storage = { + size_gb = 100 + type = "filestore" + mount_path = "/home" + } + munge = { + key = "PUT_HERE_YOUR_KEY" + } +} + +impersonate_service_account = "YOUR_SERVICE_ACCOUNT_ID" <-- if you are using one + +``` + +3\. Gather the credentials for the GKE cluster. + +```bash +gcloud container clusters get-credentials cluster-1 --region europe-west3 +``` + +4\. Initialize the Terraform configuration and apply it. + +```bash +terraform init +terraform apply +``` + +When you are prompted, type `yes`. It might take several seconds for this command to complete and for the cluster to show a ready status. + +The output is similar to the following: + +```bash +... +Apply complete! Resources: 17 added, 0 changed, 0 destroyed. +``` + +5\. Check that the Slurm cluster is being deployed. + +```bash +kubectl get pods -n slurm -w +NAME READY STATUS RESTARTS AGE +login-96bffd678-nbqwp 0/1 Pending 0 32s +mysql-746bcd47c6-mxd4f 1/1 Running 0 2m28s +slurmctld-0 0/1 Pending 0 2m29s +slurmd1-0 0/1 Pending 0 2m27s +slurmdbd-7b67cf9b54-dj7p4 0/1 ContainerCreating 0 31s +``` + +After deployment is complete, the output is similar to the following: + +```bash +kubectl get pods -n slurm +NAME READY STATUS RESTARTS AGE +login-96bffd678-nbqwp 1/1 Running 0 4m12s +mysql-746bcd47c6-mxd4f 1/1 Running 0 6m8s +slurmctld-0 1/1 Running 0 4s +slurmd1-0 0/1 Running 0 19s +slurmdbd-7b67cf9b54-dj7p4 1/1 Running 0 4m11s +``` + +6\. Verify that the Slurm cluster is working properly by logging in to the login pod. + +```bash +kubectl -n slurm exec -it login-96bffd678-nbqwp -- bash +root@login:/opt# sinfo +PARTITION AVAIL TIMELIMIT NODES STATE NODELIST +all\* up infinite 79 idle~ slurmd-[0-39],slurmd1-[1-39] +all\* up infinite 1 idle slurmd1-0 +1gpunodes up infinite 40 idle slurmd-[0-39] +2gpunodes up infinite 39 idle~ slurmd1-[1-39] +2gpunodes up infinite 1 idle slurmd1-0 root@login:/opt +``` + +## Clean up + +To avoid incurring charges to your Google Cloud account for the resources that you used in this tutorial, either delete the project that contains the resources, or keep the project and delete the individual resources. + +### Delete the project + +The easiest way to avoid billing is to delete the project that you created for the tutorial. + +Caution: Deleting a project has the following effects: + +* Everything in the project is deleted. If you used an existing project for the tasks in this document, deleting the project also deletes any other work that you've done in the project. +* Custom project IDs are lost. When you created this project, you might have created a custom project ID that you want to use in the future. To preserve the URLs that use the project ID, such as an appspot.com URL, delete selected resources inside the project instead of deleting the whole project. + +If you plan to explore multiple architectures, tutorials, or quickstarts, reusing projects can help you avoid exceeding project quota limits. + +1. In the Google Cloud console, go to the **Manage resources** page. + [Go to Manage resources](https://console.cloud.google.com/iam-admin/projects). +2. In the project list, select the project that you want to delete, and then click **Delete**. +3. In the dialog, type the project ID, and then click **Shut down** to delete the project. + +### Delete the individual resources + +If you used an existing project and you don't want to delete it entirely, delete the +individual resources. + +1\. Run the terraform destroy command to delete all the Slurm resources that you created in the previous steps: + +```bash +cd slurm-on-gke terraform destroy +``` + +If you used an existing cluster, you can skip the following step. + +2\. Run the terraform destroy command on the infrastructure directory: + +```bash +cd infrastructure +terraform destroy +``` + +This step deletes all the resources that you created previously: the GKE cluster, the VPC network, the firewall rules, and the Google Cloud project. + +## License + +* The use of the assets contained in this repository is subject to compliance with [Google's AI Principles](https://ai.google/responsibility/principles/) +* See [LICENSE](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/LICENSE) +* This project is adapted from the [Stack HPC \- slurm k8s cluster project](https://github.com/stackhpc/slurm-k8s-cluster) by Giovanni Torres. Copied or derived files from the original repository, as specified within their headers, are licensed under the original MIT license and copyrighted by [Giovanni Torres](https://github.com/stackhpc/slurm-k8s-cluster/blob/main/LICENSE). diff --git a/slurm-on-gke/image/Dockerfile b/slurm-on-gke/image/Dockerfile new file mode 100644 index 000000000..7f70fe7b5 --- /dev/null +++ b/slurm-on-gke/image/Dockerfile @@ -0,0 +1,121 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +FROM ubuntu:22.04 + +ARG SLURM_TAG=slurm-23.02 +ARG GOSU_VERSION=1.11 + + +RUN set -x \ + && apt-get update \ + && apt-get install -y \ + wget \ + gcc \ + git \ + make \ + munge \ + libmunge-dev \ + python3-dev \ + python3-pip \ + python3 \ + hwloc \ + libhwloc-dev \ + libpmix-dev \ + libhttp-parser-dev \ + libmysqlclient-dev \ + libjson-c-dev \ + psmisc \ + bzip2 \ + python3-http-parser \ + nvidia-utils-535 \ + nvidia-cuda-toolkit-gcc \ + nvidia-cuda-dev \ + libnvidia-compute-535 \ + mariadb-server \ + libdbus-1-dev \ + openmpi-common \ + openmpi-bin \ + vim + +RUN pip3 install Cython nose + +RUN set -ex \ + && wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" \ + && wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64.asc" \ + && export GNUPGHOME="$(mktemp -d)" \ + && gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \ + && gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \ + && rm -rf "${GNUPGHOME}" /usr/local/bin/gosu.asc \ + && chmod +x /usr/local/bin/gosu \ + && gosu nobody true + +ENV SHELL=/bin/bash +RUN set -x \ + && git clone -b ${SLURM_TAG} --single-branch --depth=1 https://github.com/SchedMD/slurm.git \ + && cd slurm \ + && ./configure --enable-debug --prefix=/usr --sysconfdir=/etc/slurm \ + --with-mysql_config=/usr/bin --libdir=/usr/lib64 \ + && make install \ + && install -D -m644 contribs/slurm_completion_help/slurm_completion.sh /etc/profile.d/slurm_completion.sh \ + && cd .. \ + && rm -rf slurm + +RUN mkdir /var/spool/slurmd \ + /var/run/slurmd \ + /var/run/slurmdbd \ + /var/lib/slurmd \ + /var/log/slurm \ + /data \ + /etc/slurm \ + && touch /var/lib/slurmd/node_state \ + /var/lib/slurmd/front_end_state \ + /var/lib/slurmd/job_state \ + /var/lib/slurmd/resv_state \ + /var/lib/slurmd/trigger_state \ + /var/lib/slurmd/assoc_mgr_state \ + /var/lib/slurmd/assoc_usage \ + /var/lib/slurmd/qos_usage \ + /var/lib/slurmd/fed_mgr_state \ + && useradd -r --uid=990 slurm \ + && chown -R slurm:slurm /var/*/slurm* + +WORKDIR /opt +RUN export VERSION=1.18 OS=linux ARCH=amd64 \ + && wget https://dl.google.com/go/go$VERSION.$OS-$ARCH.tar.gz \ + && tar -xzvf go$VERSION.$OS-$ARCH.tar.gz \ + && export PATH=$PWD/go/bin:$PATH \ + && git clone https://github.com/vpenso/prometheus-slurm-exporter.git \ + && cd prometheus-slurm-exporter \ + && go build + +RUN mv /opt/prometheus-slurm-exporter/prometheus-slurm-exporter /usr/local/bin + +RUN mkdir -p /run/munge \ + && chown munge.munge /run/munge + +VOLUME /etc/slurm +COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh +RUN chmod 755 /usr/local/bin/docker-entrypoint.sh +ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"] + +CMD ["slurmdbd"] \ No newline at end of file diff --git a/slurm-on-gke/image/docker-entrypoint.sh b/slurm-on-gke/image/docker-entrypoint.sh new file mode 100644 index 000000000..66ebac5d6 --- /dev/null +++ b/slurm-on-gke/image/docker-entrypoint.sh @@ -0,0 +1,146 @@ +# MIT License + +# Copyright (c) 2019 Giovanni Torres + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +#!/bin/bash +set -euo pipefail + +function start_munge(){ + + echo "---> Copying MUNGE key ..." + cp /tmp/munge.key /etc/munge/munge.key + chown munge:munge /etc/munge/munge.key + + echo "---> Starting the MUNGE Authentication service (munged) ..." + gosu munge /usr/sbin/munged "$@" +} + +if [ "$1" = "slurmdbd" ] +then + + start_munge + + echo "---> Starting the Slurm Database Daemon (slurmdbd) ..." + + cp /tmp/slurmdbd.conf /etc/slurm/slurmdbd.conf + echo "StoragePass=${StoragePass}" >> /etc/slurm/slurmdbd.conf + chown slurm:slurm /etc/slurm/slurmdbd.conf + chmod 600 /etc/slurm/slurmdbd.conf + { + . /etc/slurm/slurmdbd.conf + until echo "SELECT 1" | mysql -h $StorageHost -u$StorageUser -p$StoragePass 2>&1 > /dev/null + do + echo "-- Waiting for database to become active ..." + sleep 2 + done + } + echo "-- Database is now active ..." + + exec gosu slurm /usr/sbin/slurmdbd -D "${@:2}" + +elif [ "$1" = "slurmctld" ] +then + + start_munge + + echo "---> Waiting for slurmdbd to become active before starting slurmctld ..." + + until 2>/dev/null >/dev/tcp/slurmdbd/6819 + do + echo "-- slurmdbd is not available. Sleeping ..." + sleep 2 + done + echo "-- slurmdbd is now active ..." + + echo "---> Setting permissions for state directory ..." + chown slurm:slurm /var/spool/slurmctld + + echo "---> Starting the Slurm Controller Daemon (slurmctld) ..." + if /usr/sbin/slurmctld -V | grep -q '17.02' ; then + exec gosu slurm /usr/sbin/slurmctld -D "${@:2}" + else + exec gosu slurm /usr/sbin/slurmctld -i -D "${@:2}" + fi + +elif [ "$1" = "slurmd" ] +then + echo "---> Set shell resource limits ..." + #ulimit -l unlimited + #ulimit -s unlimited + #ulimit -n 131072 + #ulimit -a + + start_munge + + cgroup_dir=`find /sys/fs/cgroup/kubepods.slice -type d -name "kubepods-pod*"` + mkdir -p "$cgroup_dir/system.slice/slurmstepd.scope/system.slice/slurmstepd.scope" + mkdir -p "/var/spool/slurmd" + + echo "---> Starting the Slurm Node Daemon (slurmd) ..." + echo "${@:1}" + exec slurmd -D -s -vvv --conf-server="slurmctld-0:6820-6830" -Z -N $POD_NAME + +elif [ "$1" = "login" ] +then + + start_munge + while true; do sleep 30; done; + +elif [ "$1" = "check-queue-hook" ] +then + start_munge + + scontrol update NodeName=all State=DRAIN Reason="Preventing new jobs running before upgrade" + + RUNNING_JOBS=$(squeue --states=RUNNING,COMPLETING,CONFIGURING,RESIZING,SIGNALING,STAGE_OUT,STOPPED,SUSPENDED --noheader --array | wc --lines) + + if [[ $RUNNING_JOBS -eq 0 ]] + then + exit 0 + else + exit 1 + fi + +elif [ "$1" = "undrain-nodes-hook" ] +then + start_munge + scontrol update NodeName=all State=UNDRAIN + exit 0 + +elif [ "$1" = "generate-keys-hook" ] +then + mkdir -p ./temphostkeys/etc/ssh + ssh-keygen -A -f ./temphostkeys + kubectl create secret generic host-keys-secret \ + --dry-run=client \ + --from-file=./temphostkeys/etc/ssh \ + -o yaml | \ + kubectl apply -f - + + exit 0 + +elif [ "$1" = "debug" ] +then + start_munge --foreground + +else + exec "$@" +fi \ No newline at end of file diff --git a/slurm-on-gke/infrastructure/main.tf b/slurm-on-gke/infrastructure/main.tf new file mode 100644 index 000000000..0ff4d291b --- /dev/null +++ b/slurm-on-gke/infrastructure/main.tf @@ -0,0 +1,196 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module "project" { + source = "github.com/terraform-google-modules/cloud-foundation-fabric//modules/project?ref=v33.0.0" + billing_account = var.billing_account_id + name = var.project_id + parent = var.folder_id + services = [ + "compute.googleapis.com", + "stackdriver.googleapis.com", + "container.googleapis.com", + "file.googleapis.com", + "servicenetworking.googleapis.com" + ] +} + +module "vpc" { + source = "github.com/terraform-google-modules/cloud-foundation-fabric//modules/net-vpc?ref=v33.0.0" + project_id = module.project.project_id + name = "default" + subnets = [ + { + ip_cidr_range = "10.0.0.0/24" + name = "subnet-1" + region = var.region + secondary_ip_ranges = { + pods = "172.16.0.0/20" + services = "192.168.0.0/24" + } + }, + { + ip_cidr_range = "10.10.0.0/24" + name = "subnet-lb-1" + region = var.region + } + ] + subnets_proxy_only = [ + { + ip_cidr_range = "10.0.1.0/24" + name = "regional-proxy" + region = var.region + active = true + } + ] +} + + + +module "firewall" { + source = "github.com/terraform-google-modules/cloud-foundation-fabric//modules/net-vpc-firewall?ref=v33.0.0" + project_id = module.project.project_id + network = module.vpc.network.name + default_rules_config = { + admin_ranges = ["10.0.0.0/8"] + } + ingress_rules = { + # implicit allow action + allow-ingress-ssh = { + description = "Allow SSH from IAP" + source_ranges = ["35.235.240.0/20"] + rules = [{ protocol = "tcp", ports = [22] }] + } + } +} + +module "nat" { + source = "github.com/terraform-google-modules/cloud-foundation-fabric//modules/net-cloudnat?ref=v33.0.0" + project_id = module.project.project_id + region = var.region + name = "default" + router_network = module.vpc.network.self_link +} + +module "docker_artifact_registry" { + source = "github.com/terraform-google-modules/cloud-foundation-fabric//modules/artifact-registry?ref=v33.0.0" + project_id = module.project.project_id + location = var.region + name = "slurm" + format = { docker = { standard = {} } } +} + +module "cluster_nodepool_sa" { + source = "github.com/terraform-google-modules/cloud-foundation-fabric//modules/iam-service-account?ref=v33.0.0" + project_id = module.project.project_id + name = "cluster-nodepool-sa" + iam_project_roles = { + "${module.project.project_id}" = [ + "roles/monitoring.metricWriter", + "roles/logging.logWriter", + "roles/artifactregistry.reader", + ] + } +} + + +module "cluster-1" { + source = "github.com/terraform-google-modules/cloud-foundation-fabric//modules/gke-cluster-standard?ref=v33.0.0" + project_id = module.project.project_id + name = "cluster-1" + location = var.region + release_channel = "RAPID" + deletion_protection = false + vpc_config = { + network = module.vpc.self_link + subnetwork = module.vpc.subnets["${var.region}/subnet-1"].self_link + secondary_range_names = { + pods = "pods" + services = "services" + } + master_ipv4_cidr_block = "172.19.27.0/28" + } + enable_addons = { + gce_persistent_disk_csi_driver = true + http_load_balancing = true + horizontal_pod_autoscaling = true + gcp_filestore_csi_driver = true + gcs_fuse_csi_driver = true + } + private_cluster_config = { + enable_private_endpoint = false + master_global_access = true + } + enable_features = { + dataplane_v2 = true + workload_identity = true + image_streaming = true + intranode_visibility = true + + dns = { + provider = "CLOUD_DNS" + scope = "CLUSTER_SCOPE" + } + } + + backup_configs = { + enable_backup_agent = false + } + monitoring_config = { + enable_api_server_metrics = true + enable_controller_manager_metrics = true + enable_scheduler_metrics = true + } + logging_config = { + enable_workloads_logs = true + } +} + +module "cluster-1-nodepool-1" { + source = "github.com/terraform-google-modules/cloud-foundation-fabric//modules/gke-nodepool?ref=v33.0.0" + project_id = module.project.project_id + cluster_name = module.cluster-1.name + location = var.region + name = "nodepool-1" + service_account = { + create = false + email = module.cluster_nodepool_sa.email + oauth_scopes = [ + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring", + "https://www.googleapis.com/auth/monitoring.write", + "https://www.googleapis.com/auth/cloud-platform" + ] + } + node_config = { + machine_type = "n1-standard-8" + disk_size_gb = 100 + disk_type = "pd-ssd" + ephemeral_ssd_count = 1 + gvnic = true + } + nodepool_config = { + autoscaling = { + max_node_count = 10 + min_node_count = 2 + } + management = { + auto_repair = true + auto_upgrade = true + } + } +} + diff --git a/slurm-on-gke/infrastructure/providers.tf b/slurm-on-gke/infrastructure/providers.tf new file mode 100644 index 000000000..73b326e04 --- /dev/null +++ b/slurm-on-gke/infrastructure/providers.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +provider "google" { + impersonate_service_account = var.impersonate_service_account +} + +provider "google-beta" { + impersonate_service_account = var.impersonate_service_account +} diff --git a/slurm-on-gke/infrastructure/slurm.tf b/slurm-on-gke/infrastructure/slurm.tf new file mode 100644 index 000000000..0dfcbe9d1 --- /dev/null +++ b/slurm-on-gke/infrastructure/slurm.tf @@ -0,0 +1,111 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +module "slurm_nodepool_sa" { + source = "github.com/terraform-google-modules/cloud-foundation-fabric//modules/iam-service-account?ref=v33.0.0" + project_id = module.project.project_id + name = "slurm-nodepool-sa" + # non-authoritative roles granted *to* the service accounts on other resources + iam_project_roles = { + "${module.project.project_id}" = [ + "roles/monitoring.metricWriter", + "roles/logging.logWriter", + "roles/artifactregistry.reader", + ] + } +} + +module "cluster-1-nodepool-2" { + source = "github.com/terraform-google-modules/cloud-foundation-fabric//modules/gke-nodepool?ref=v33.0.0" + project_id = module.project.project_id + cluster_name = module.cluster-1.name + location = var.region + name = "slurm-001" + service_account = { + create = false + email = module.slurm_nodepool_sa.email + oauth_scopes = [ + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring", + "https://www.googleapis.com/auth/monitoring.write", + "https://www.googleapis.com/auth/cloud-platform" + ] + } + node_locations = ["${var.region}-b"] + node_config = { + machine_type = "n1-standard-8" + disk_size_gb = 100 + disk_type = "pd-ssd" + gvnic = true + spot = false + guest_accelerator = { + type = "nvidia-tesla-t4" + count = 2 + gpu_driver = { + version = "DEFAULT" + } + } + } + + nodepool_config = { + autoscaling = { + max_node_count = 10 + min_node_count = 1 + } + management = { + auto_repair = true + auto_upgrade = true + } + } +} + +module "cluster-1-nodepool-3" { + source = "github.com/terraform-google-modules/cloud-foundation-fabric//modules/gke-nodepool?ref=v33.0.0" + project_id = module.project.project_id + cluster_name = module.cluster-1.name + location = var.region + name = "slurm-002" + service_account = { + create = false + email = module.slurm_nodepool_sa.email + oauth_scopes = [ + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring", + "https://www.googleapis.com/auth/monitoring.write", + "https://www.googleapis.com/auth/cloud-platform" + ] + } + node_locations = ["${var.region}-b"] + node_config = { + machine_type = "g2-standard-4" + disk_size_gb = 100 + disk_type = "pd-ssd" + gvnic = true + spot = false + } + + nodepool_config = { + autoscaling = { + max_node_count = 10 + min_node_count = 2 + } + management = { + auto_repair = true + auto_upgrade = true + } + } +} diff --git a/slurm-on-gke/infrastructure/variables.tf b/slurm-on-gke/infrastructure/variables.tf new file mode 100644 index 000000000..b95d2cf08 --- /dev/null +++ b/slurm-on-gke/infrastructure/variables.tf @@ -0,0 +1,46 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "billing_account_id" { + description = "Google Cloud Billing Account ID" + type = string + nullable = false +} + +variable "folder_id" { + description = "Google Cloud Folder ID" + type = string + nullable = false +} + +variable "impersonate_service_account" { + description = "Service account to be used while using Google Cloud APIs" + type = string + nullable = true + default = null +} + +variable "region" { + description = "Google Cloud Region where the GKE cluster is located" + type = string + nullable = false +} + +variable "project_id" { + description = "Google Cloud Project ID" + type = string + nullable = false +} diff --git a/slurm-on-gke/main.tf b/slurm-on-gke/main.tf new file mode 100644 index 000000000..6aa01b7a9 --- /dev/null +++ b/slurm-on-gke/main.tf @@ -0,0 +1,49 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module "slurm-cluster-001" { + source = "../modules/slurm-cluster/" + namespace = "slurm" + namespace_create = true + + cluster_config = var.config +} + +module "slurm-workers-001" { + source = "../modules/slurm-nodeset" + name = "slurmd" + config = { + type = "g2-standard-4" + instances = 2 + namespace = module.slurm-cluster-001.namespace + image = var.config.image + } +} + +module "slurm-workers-002" { + source = "../modules/slurm-nodeset" + name = "slurmd1" + config = { + type = "n1-standard-8" + instances = 1 + namespace = module.slurm-cluster-001.namespace + image = var.config.image + accelerator = { + type = "nvidia-tesla-t4" + count = 2 + } + } +} diff --git a/slurm-on-gke/providers.tf b/slurm-on-gke/providers.tf new file mode 100644 index 000000000..1a2f18ccf --- /dev/null +++ b/slurm-on-gke/providers.tf @@ -0,0 +1,62 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +terraform { + required_version = "~> 1.9.3" + required_providers { + google = { + source = "google" + version = "~> 6.9.0" + } + google-beta = { + source = "google-beta" + version = "~> 6.9.0" + } + kubernetes = { + source = "kubernetes" + version = "~> 2.33.0" + } + } +} + +provider "google" { + impersonate_service_account = var.impersonate_service_account +} + +provider "google-beta" { + impersonate_service_account = var.impersonate_service_account +} + +data "google_client_config" "identity" { + count = var.credentials_config.fleet_host != null ? 1 : 0 +} + +provider "kubernetes" { + config_path = ( + var.credentials_config.kubeconfig == null + ? null + : pathexpand(var.credentials_config.kubeconfig.path) + ) + config_context = try( + var.credentials_config.kubeconfig.context, null + ) + host = ( + var.credentials_config.fleet_host == null + ? null + : var.credentials_config.fleet_host + ) + token = try(data.google_client_config.identity.0.access_token, null) +} diff --git a/slurm-on-gke/solution-architecture.png b/slurm-on-gke/solution-architecture.png new file mode 100644 index 000000000..54c806901 Binary files /dev/null and b/slurm-on-gke/solution-architecture.png differ diff --git a/slurm-on-gke/variables.tf b/slurm-on-gke/variables.tf new file mode 100644 index 000000000..51f78fde9 --- /dev/null +++ b/slurm-on-gke/variables.tf @@ -0,0 +1,88 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +variable "cluster_name" { + description = "Cluster Name to use" + type = string + nullable = false +} + +variable "project_id" { + description = "Google Cloud Project ID" + type = string + nullable = false +} + +variable "region" { + description = "Google Cloud Region where the GKE cluster is located" + type = string + nullable = false +} + +variable "impersonate_service_account" { + description = "Service account to be used while using Google Cloud APIs" + type = string + nullable = true + default = null +} + +variable "config" { + description = "Configure Slurm cluster statefulset parameters." + type = object({ + name = optional(string, "linux") + image = optional(string, "") + database = object({ + create = optional(bool, true) + host = optional(string, "mysql") + user = optional(string, "slurm") + password = optional(string, "") + storage_size_gb = optional(number, 1) + }) + munge = object({ + key = optional(string, "") + }) + storage = object({ + type = optional(string, "filestore") + size_gb = optional(number, 100) + }) + }) + nullable = false +} + +variable "credentials_config" { + description = "Configure how Terraform authenticates to the cluster." + type = object({ + fleet_host = optional(string) + kubeconfig = optional(object({ + context = optional(string) + path = optional(string, "~/.kube/config") + })) + }) + nullable = false + validation { + condition = ( + (var.credentials_config.fleet_host != null) != + (var.credentials_config.kubeconfig != null) + ) + error_message = "Exactly one of fleet host or kubeconfig must be set." + } + default = { + kubeconfig = { + path = "~/.kube/config" + } + } +}