From 81ce0c86e9b63b41ee4537036d2ac853ef87711a Mon Sep 17 00:00:00 2001 From: Laurent Grangeau Date: Mon, 9 Sep 2024 18:28:56 +0200 Subject: [PATCH 1/3] feat: add deployment of nvidia nim --- .../terraform/features/nim/README.md | 1 + .../terraform/features/nim/main.tf | 130 ++++++++++++++++++ .../terraform/features/nim/outputs.tf | 3 + .../terraform/features/nim/providers.tf | 18 +++ .../terraform/features/nim/variables.tf | 50 +++++++ 5 files changed, 202 insertions(+) create mode 100644 best-practices/ml-platform/terraform/features/nim/README.md create mode 100644 best-practices/ml-platform/terraform/features/nim/main.tf create mode 100644 best-practices/ml-platform/terraform/features/nim/outputs.tf create mode 100644 best-practices/ml-platform/terraform/features/nim/providers.tf create mode 100644 best-practices/ml-platform/terraform/features/nim/variables.tf diff --git a/best-practices/ml-platform/terraform/features/nim/README.md b/best-practices/ml-platform/terraform/features/nim/README.md new file mode 100644 index 000000000..fb819e930 --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/README.md @@ -0,0 +1 @@ +# Deploy NVIDIA NIMs on GKE \ No newline at end of file diff --git a/best-practices/ml-platform/terraform/features/nim/main.tf b/best-practices/ml-platform/terraform/features/nim/main.tf new file mode 100644 index 000000000..06d11b26d --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/main.tf @@ -0,0 +1,130 @@ +data "google_container_cluster" "nim-llm" { + name = var.cluster-name + location = var.cluster-location +} + +data "google_client_config" "current" { + +} + +resource "kubernetes_namespace" "nim" { + metadata { + name = var.kubernetes-namespace + } +} + +resource "kubernetes_secret" "ngc-secret" { + metadata { + name = "ngc-secret" + namespace = kubernetes_namespace.nim.metadata.0.name + } + + type = "kubernetes.io/dockerconfigjson" + + data = { + ".dockerconfigjson" = jsonencode({ + "auths" = { + "nvcr.io" = { + "username" = "$oauthtoken" + "password" = var.ngc-api-key + "auth" = base64encode("$oauthtoken:${var.ngc-api-key}") + } + } + }) + } + + depends_on = [kubernetes_namespace.nim] +} + +resource "kubernetes_secret" "ngc-api" { + metadata { + name = "ngc-api" + namespace = kubernetes_namespace.nim.metadata.0.name + } + + type = "Opaque" + + data = { + NGC_API_KEY = var.ngc-api-key + } + + depends_on = [kubernetes_namespace.nim] +} + +resource "kubernetes_storage_class" "name" { + metadata { + name = "hyperdisk-ml" + } + + parameters = { + type = "hyperdisk-ml" + } + + storage_provisioner = "pd.csi.storage.gke.io" + allow_volume_expansion = false + reclaim_policy = "Delete" + volume_binding_mode = "WaitForFirstConsumer" +} + +resource "kubernetes_persistent_volume_claim" "name" { + metadata { + generate_name = "pvc-nim-" + namespace = kubernetes_namespace.nim.metadata.0.name + } + + spec { + access_modes = ["ReadWriteOnce"] + storage_class_name = kubernetes_storage_class.name.metadata.0.name + resources { + requests = { + storage = "100Gi" + } + } + } + wait_until_bound = false +} + +resource "helm_release" "nim-release" { + name = "nim" + namespace = kubernetes_namespace.nim.metadata.0.name + chart = "https://helm.ngc.nvidia.com/nim/charts/nim-llm-${var.chart-version}.tgz" + repository_username = "$oauthtoken" + repository_password = var.ngc-api-key + + set { + name = "image.repository" + value = "nvcr.io/nim/${var.image-name}" + } + + set { + name = "image.tag" + value = var.image-tag + } + + set { + name = "model.name" + value = var.image-name + } + + set { + name = "service.type" + value = "LoadBalancer" + } + + set { + name = "resources.limits.nvidia\\.com/gpu" + value = var.gpu-limits + } + + set { + name = "persistence.enabled" + value = true + } + + set { + name = "persistence.existingClaim" + value = kubernetes_persistent_volume_claim.name.metadata.0.name + } + + depends_on = [kubernetes_namespace.nim] +} diff --git a/best-practices/ml-platform/terraform/features/nim/outputs.tf b/best-practices/ml-platform/terraform/features/nim/outputs.tf new file mode 100644 index 000000000..bb81ed01b --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/outputs.tf @@ -0,0 +1,3 @@ +output "inference-url" { + value = "" +} \ No newline at end of file diff --git a/best-practices/ml-platform/terraform/features/nim/providers.tf b/best-practices/ml-platform/terraform/features/nim/providers.tf new file mode 100644 index 000000000..8cbecaf81 --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/providers.tf @@ -0,0 +1,18 @@ +provider "google" { + project = var.google-project +} + +provider "kubernetes" { + host = "https://${data.google_container_cluster.nim-llm.endpoint}" + cluster_ca_certificate = base64decode(data.google_container_cluster.nim-llm.master_auth.0.cluster_ca_certificate) + token = data.google_client_config.current.access_token +} + +provider "helm" { + kubernetes { + host = "https://${data.google_container_cluster.nim-llm.endpoint}" + cluster_ca_certificate = base64decode(data.google_container_cluster.nim-llm.master_auth.0.cluster_ca_certificate) + token = data.google_client_config.current.access_token + } +} + diff --git a/best-practices/ml-platform/terraform/features/nim/variables.tf b/best-practices/ml-platform/terraform/features/nim/variables.tf new file mode 100644 index 000000000..eceb2dc8b --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/variables.tf @@ -0,0 +1,50 @@ +variable "cluster-name" { + description = "The name of the cluster NIM will be deployed to" + type = string +} + +variable "cluster-location" { + description = "The location of the cluster NIM will be deployed to" + type = string +} + +variable "google-project" { + description = "The name of the google project that contains the cluster NIM will be deployed to" + type = string +} + +variable "kubernetes-namespace" { + description = "The namespace where NIM will be deployed" + default = "nim" + type = string +} + +variable "gpu-limits" { + description = "Number of GPUs that will be presented to the model" + default = 1 + type = number +} + +variable "ngc-api-key" { + description = "Your NGC API key" + type = string + sensitive = true +} + +variable "chart-version" { + description = "The version of the chart" + default = "1.1.2" + type = string +} + +variable "image-name" { + description = "The name of the image to be deployed by NIM. Should be /" + default = "meta/llama3-8b-instruct" + type = string +} + +variable "image-tag" { + description = "The tag of the image to be deployed by NIM" + default = "1.0.0" + type = string +} From e79701766b73ea967646cb0538877f7a8d8ebf35 Mon Sep 17 00:00:00 2001 From: Laurent Grangeau Date: Tue, 10 Sep 2024 10:37:00 +0200 Subject: [PATCH 2/3] fix: follow terraform best practices --- .../terraform/features/nim/main.tf | 32 +++++++++---------- .../terraform/features/nim/outputs.tf | 2 +- .../terraform/features/nim/providers.tf | 2 +- .../terraform/features/nim/variables.tf | 22 ++++++------- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/best-practices/ml-platform/terraform/features/nim/main.tf b/best-practices/ml-platform/terraform/features/nim/main.tf index 06d11b26d..3c6ea0fcf 100644 --- a/best-practices/ml-platform/terraform/features/nim/main.tf +++ b/best-practices/ml-platform/terraform/features/nim/main.tf @@ -1,6 +1,6 @@ -data "google_container_cluster" "nim-llm" { - name = var.cluster-name - location = var.cluster-location +data "google_container_cluster" "nim_llm" { + name = var.cluster_name + location = var.cluster_location } data "google_client_config" "current" { @@ -9,11 +9,11 @@ data "google_client_config" "current" { resource "kubernetes_namespace" "nim" { metadata { - name = var.kubernetes-namespace + name = var.kubernetes_namespace } } -resource "kubernetes_secret" "ngc-secret" { +resource "kubernetes_secret" "ngc_secret" { metadata { name = "ngc-secret" namespace = kubernetes_namespace.nim.metadata.0.name @@ -26,8 +26,8 @@ resource "kubernetes_secret" "ngc-secret" { "auths" = { "nvcr.io" = { "username" = "$oauthtoken" - "password" = var.ngc-api-key - "auth" = base64encode("$oauthtoken:${var.ngc-api-key}") + "password" = var.ngc_api_key + "auth" = base64encode("$oauthtoken:${var.ngc_api_key}") } } }) @@ -36,7 +36,7 @@ resource "kubernetes_secret" "ngc-secret" { depends_on = [kubernetes_namespace.nim] } -resource "kubernetes_secret" "ngc-api" { +resource "kubernetes_secret" "ngc_api" { metadata { name = "ngc-api" namespace = kubernetes_namespace.nim.metadata.0.name @@ -45,7 +45,7 @@ resource "kubernetes_secret" "ngc-api" { type = "Opaque" data = { - NGC_API_KEY = var.ngc-api-key + NGC_API_KEY = var.ngc_api_key } depends_on = [kubernetes_namespace.nim] @@ -84,26 +84,26 @@ resource "kubernetes_persistent_volume_claim" "name" { wait_until_bound = false } -resource "helm_release" "nim-release" { +resource "helm_release" "nim_release" { name = "nim" namespace = kubernetes_namespace.nim.metadata.0.name - chart = "https://helm.ngc.nvidia.com/nim/charts/nim-llm-${var.chart-version}.tgz" + chart = "https://helm.ngc.nvidia.com/nim/charts/nim-llm-${var.chart_version}.tgz" repository_username = "$oauthtoken" - repository_password = var.ngc-api-key + repository_password = var.ngc_api_key set { name = "image.repository" - value = "nvcr.io/nim/${var.image-name}" + value = "nvcr.io/nim/${var.image_name}" } set { name = "image.tag" - value = var.image-tag + value = var.image_tag } set { name = "model.name" - value = var.image-name + value = var.image_name } set { @@ -113,7 +113,7 @@ resource "helm_release" "nim-release" { set { name = "resources.limits.nvidia\\.com/gpu" - value = var.gpu-limits + value = var.gpu_limits } set { diff --git a/best-practices/ml-platform/terraform/features/nim/outputs.tf b/best-practices/ml-platform/terraform/features/nim/outputs.tf index bb81ed01b..51c4ed400 100644 --- a/best-practices/ml-platform/terraform/features/nim/outputs.tf +++ b/best-practices/ml-platform/terraform/features/nim/outputs.tf @@ -1,3 +1,3 @@ -output "inference-url" { +output "inference_url" { value = "" } \ No newline at end of file diff --git a/best-practices/ml-platform/terraform/features/nim/providers.tf b/best-practices/ml-platform/terraform/features/nim/providers.tf index 8cbecaf81..c2ba332b3 100644 --- a/best-practices/ml-platform/terraform/features/nim/providers.tf +++ b/best-practices/ml-platform/terraform/features/nim/providers.tf @@ -1,5 +1,5 @@ provider "google" { - project = var.google-project + project = var.google_project } provider "kubernetes" { diff --git a/best-practices/ml-platform/terraform/features/nim/variables.tf b/best-practices/ml-platform/terraform/features/nim/variables.tf index eceb2dc8b..505af9331 100644 --- a/best-practices/ml-platform/terraform/features/nim/variables.tf +++ b/best-practices/ml-platform/terraform/features/nim/variables.tf @@ -1,49 +1,49 @@ -variable "cluster-name" { +variable "cluster_name" { description = "The name of the cluster NIM will be deployed to" type = string } -variable "cluster-location" { +variable "cluster_location" { description = "The location of the cluster NIM will be deployed to" type = string } -variable "google-project" { +variable "google_project" { description = "The name of the google project that contains the cluster NIM will be deployed to" type = string } -variable "kubernetes-namespace" { +variable "kubernetes_namespace" { description = "The namespace where NIM will be deployed" default = "nim" type = string } -variable "gpu-limits" { +variable "gpu_limits" { description = "Number of GPUs that will be presented to the model" - default = 1 - type = number + default = 1 + type = number } -variable "ngc-api-key" { +variable "ngc_api_key" { description = "Your NGC API key" type = string sensitive = true } -variable "chart-version" { +variable "chart_version" { description = "The version of the chart" default = "1.1.2" type = string } -variable "image-name" { +variable "image_name" { description = "The name of the image to be deployed by NIM. Should be /" default = "meta/llama3-8b-instruct" type = string } -variable "image-tag" { +variable "image_tag" { description = "The tag of the image to be deployed by NIM" default = "1.0.0" type = string From 3cb2042698515cbe2208995d12cc8e089f9ddf11 Mon Sep 17 00:00:00 2001 From: Laurent Grangeau Date: Thu, 12 Sep 2024 17:02:12 +0200 Subject: [PATCH 3/3] fix: peer reviews --- .../terraform/features/nim/README.md | 23 +++++++++- .../terraform/features/nim/main.tf | 42 ++++++++++--------- .../terraform/features/nim/outputs.tf | 18 +++++++- .../terraform/features/nim/providers.tf | 32 +++++++++++--- .../terraform/features/nim/variables.tf | 14 +++++++ .../terraform/features/nim/versions.tf | 30 +++++++++++++ 6 files changed, 131 insertions(+), 28 deletions(-) create mode 100644 best-practices/ml-platform/terraform/features/nim/versions.tf diff --git a/best-practices/ml-platform/terraform/features/nim/README.md b/best-practices/ml-platform/terraform/features/nim/README.md index fb819e930..c1d049fa6 100644 --- a/best-practices/ml-platform/terraform/features/nim/README.md +++ b/best-practices/ml-platform/terraform/features/nim/README.md @@ -1 +1,22 @@ -# Deploy NVIDIA NIMs on GKE \ No newline at end of file +# Deploy NVIDIA NIMs on GKE + +## Before you begin + +## How to use this repository + +1. Clone the repository and change directory to the guide directory + + ```bash + git clone https://github.com/GoogleCloudPlatform/ai-on-gke && \ + cd ai-on-gke/best-practices/ml-platform/terraform/features/nim + ``` + +## Setup variables + +## Deploy the NIM with the Helm chart + + ```bash + terraform init && terraform apply + ``` + +## Test the NIM \ No newline at end of file diff --git a/best-practices/ml-platform/terraform/features/nim/main.tf b/best-practices/ml-platform/terraform/features/nim/main.tf index 3c6ea0fcf..f5c0cb97f 100644 --- a/best-practices/ml-platform/terraform/features/nim/main.tf +++ b/best-practices/ml-platform/terraform/features/nim/main.tf @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + data "google_container_cluster" "nim_llm" { name = var.cluster_name location = var.cluster_location @@ -7,6 +21,12 @@ data "google_client_config" "current" { } +data "kubernetes_service" "nim_svc" { + metadata { + name = "nim-nim-llm" + } +} + resource "kubernetes_namespace" "nim" { metadata { name = var.kubernetes_namespace @@ -51,30 +71,14 @@ resource "kubernetes_secret" "ngc_api" { depends_on = [kubernetes_namespace.nim] } -resource "kubernetes_storage_class" "name" { - metadata { - name = "hyperdisk-ml" - } - - parameters = { - type = "hyperdisk-ml" - } - - storage_provisioner = "pd.csi.storage.gke.io" - allow_volume_expansion = false - reclaim_policy = "Delete" - volume_binding_mode = "WaitForFirstConsumer" -} - -resource "kubernetes_persistent_volume_claim" "name" { +resource "kubernetes_persistent_volume_claim" "pvc_nim" { metadata { generate_name = "pvc-nim-" namespace = kubernetes_namespace.nim.metadata.0.name } spec { - access_modes = ["ReadWriteOnce"] - storage_class_name = kubernetes_storage_class.name.metadata.0.name + access_modes = ["ReadWriteOnce"] resources { requests = { storage = "100Gi" @@ -123,7 +127,7 @@ resource "helm_release" "nim_release" { set { name = "persistence.existingClaim" - value = kubernetes_persistent_volume_claim.name.metadata.0.name + value = kubernetes_persistent_volume_claim.pvc_nim.metadata.0.name } depends_on = [kubernetes_namespace.nim] diff --git a/best-practices/ml-platform/terraform/features/nim/outputs.tf b/best-practices/ml-platform/terraform/features/nim/outputs.tf index 51c4ed400..9cbac28b2 100644 --- a/best-practices/ml-platform/terraform/features/nim/outputs.tf +++ b/best-practices/ml-platform/terraform/features/nim/outputs.tf @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + output "inference_url" { - value = "" -} \ No newline at end of file + value = data.kubernetes_service.nim_svc.status.0.load_balancer.0.ingress[0].ip +} diff --git a/best-practices/ml-platform/terraform/features/nim/providers.tf b/best-practices/ml-platform/terraform/features/nim/providers.tf index c2ba332b3..eb7620d52 100644 --- a/best-practices/ml-platform/terraform/features/nim/providers.tf +++ b/best-practices/ml-platform/terraform/features/nim/providers.tf @@ -1,18 +1,38 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +locals { + host = "https://${data.google_container_cluster.nim_llm.endpoint}" + cluster_ca_certificate = base64decode(data.google_container_cluster.nim_llm.master_auth.0.cluster_ca_certificate) + token = data.google_client_config.current.access_token +} + provider "google" { project = var.google_project } provider "kubernetes" { - host = "https://${data.google_container_cluster.nim-llm.endpoint}" - cluster_ca_certificate = base64decode(data.google_container_cluster.nim-llm.master_auth.0.cluster_ca_certificate) - token = data.google_client_config.current.access_token + host = local.host + cluster_ca_certificate = local.cluster_ca_certificate + token = local.token } provider "helm" { kubernetes { - host = "https://${data.google_container_cluster.nim-llm.endpoint}" - cluster_ca_certificate = base64decode(data.google_container_cluster.nim-llm.master_auth.0.cluster_ca_certificate) - token = data.google_client_config.current.access_token + host = local.host + cluster_ca_certificate = local.cluster_ca_certificate + token = local.token } } diff --git a/best-practices/ml-platform/terraform/features/nim/variables.tf b/best-practices/ml-platform/terraform/features/nim/variables.tf index 505af9331..e07a885a4 100644 --- a/best-practices/ml-platform/terraform/features/nim/variables.tf +++ b/best-practices/ml-platform/terraform/features/nim/variables.tf @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + variable "cluster_name" { description = "The name of the cluster NIM will be deployed to" type = string diff --git a/best-practices/ml-platform/terraform/features/nim/versions.tf b/best-practices/ml-platform/terraform/features/nim/versions.tf new file mode 100644 index 000000000..a6c337a11 --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/versions.tf @@ -0,0 +1,30 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "6.2.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "2.32.0" + } + helm = { + source = "hashicorp/helm" + version = "2.15.0" + } + } +}