diff --git a/best-practices/ml-platform/terraform/features/nim/README.md b/best-practices/ml-platform/terraform/features/nim/README.md new file mode 100644 index 000000000..c1d049fa6 --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/README.md @@ -0,0 +1,22 @@ +# Deploy NVIDIA NIMs on GKE + +## Before you begin + +## How to use this repository + +1. Clone the repository and change directory to the guide directory + + ```bash + git clone https://github.com/GoogleCloudPlatform/ai-on-gke && \ + cd ai-on-gke/best-practices/ml-platform/terraform/features/nim + ``` + +## Setup variables + +## Deploy the NIM with the Helm chart + + ```bash + terraform init && terraform apply + ``` + +## Test the NIM \ No newline at end of file diff --git a/best-practices/ml-platform/terraform/features/nim/main.tf b/best-practices/ml-platform/terraform/features/nim/main.tf new file mode 100644 index 000000000..f5c0cb97f --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/main.tf @@ -0,0 +1,134 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +data "google_container_cluster" "nim_llm" { + name = var.cluster_name + location = var.cluster_location +} + +data "google_client_config" "current" { + +} + +data "kubernetes_service" "nim_svc" { + metadata { + name = "nim-nim-llm" + } +} + +resource "kubernetes_namespace" "nim" { + metadata { + name = var.kubernetes_namespace + } +} + +resource "kubernetes_secret" "ngc_secret" { + metadata { + name = "ngc-secret" + namespace = kubernetes_namespace.nim.metadata.0.name + } + + type = "kubernetes.io/dockerconfigjson" + + data = { + ".dockerconfigjson" = jsonencode({ + "auths" = { + "nvcr.io" = { + "username" = "$oauthtoken" + "password" = var.ngc_api_key + "auth" = base64encode("$oauthtoken:${var.ngc_api_key}") + } + } + }) + } + + depends_on = [kubernetes_namespace.nim] +} + +resource "kubernetes_secret" "ngc_api" { + metadata { + name = "ngc-api" + namespace = kubernetes_namespace.nim.metadata.0.name + } + + type = "Opaque" + + data = { + NGC_API_KEY = var.ngc_api_key + } + + depends_on = [kubernetes_namespace.nim] +} + +resource "kubernetes_persistent_volume_claim" "pvc_nim" { + metadata { + generate_name = "pvc-nim-" + namespace = kubernetes_namespace.nim.metadata.0.name + } + + spec { + access_modes = ["ReadWriteOnce"] + resources { + requests = { + storage = "100Gi" + } + } + } + wait_until_bound = false +} + +resource "helm_release" "nim_release" { + name = "nim" + namespace = kubernetes_namespace.nim.metadata.0.name + chart = "https://helm.ngc.nvidia.com/nim/charts/nim-llm-${var.chart_version}.tgz" + repository_username = "$oauthtoken" + repository_password = var.ngc_api_key + + set { + name = "image.repository" + value = "nvcr.io/nim/${var.image_name}" + } + + set { + name = "image.tag" + value = var.image_tag + } + + set { + name = "model.name" + value = var.image_name + } + + set { + name = "service.type" + value = "LoadBalancer" + } + + set { + name = "resources.limits.nvidia\\.com/gpu" + value = var.gpu_limits + } + + set { + name = "persistence.enabled" + value = true + } + + set { + name = "persistence.existingClaim" + value = kubernetes_persistent_volume_claim.pvc_nim.metadata.0.name + } + + depends_on = [kubernetes_namespace.nim] +} diff --git a/best-practices/ml-platform/terraform/features/nim/outputs.tf b/best-practices/ml-platform/terraform/features/nim/outputs.tf new file mode 100644 index 000000000..9cbac28b2 --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/outputs.tf @@ -0,0 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +output "inference_url" { + value = data.kubernetes_service.nim_svc.status.0.load_balancer.0.ingress[0].ip +} diff --git a/best-practices/ml-platform/terraform/features/nim/providers.tf b/best-practices/ml-platform/terraform/features/nim/providers.tf new file mode 100644 index 000000000..eb7620d52 --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/providers.tf @@ -0,0 +1,38 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +locals { + host = "https://${data.google_container_cluster.nim_llm.endpoint}" + cluster_ca_certificate = base64decode(data.google_container_cluster.nim_llm.master_auth.0.cluster_ca_certificate) + token = data.google_client_config.current.access_token +} + +provider "google" { + project = var.google_project +} + +provider "kubernetes" { + host = local.host + cluster_ca_certificate = local.cluster_ca_certificate + token = local.token +} + +provider "helm" { + kubernetes { + host = local.host + cluster_ca_certificate = local.cluster_ca_certificate + token = local.token + } +} + diff --git a/best-practices/ml-platform/terraform/features/nim/variables.tf b/best-practices/ml-platform/terraform/features/nim/variables.tf new file mode 100644 index 000000000..e07a885a4 --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/variables.tf @@ -0,0 +1,64 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +variable "cluster_name" { + description = "The name of the cluster NIM will be deployed to" + type = string +} + +variable "cluster_location" { + description = "The location of the cluster NIM will be deployed to" + type = string +} + +variable "google_project" { + description = "The name of the google project that contains the cluster NIM will be deployed to" + type = string +} + +variable "kubernetes_namespace" { + description = "The namespace where NIM will be deployed" + default = "nim" + type = string +} + +variable "gpu_limits" { + description = "Number of GPUs that will be presented to the model" + default = 1 + type = number +} + +variable "ngc_api_key" { + description = "Your NGC API key" + type = string + sensitive = true +} + +variable "chart_version" { + description = "The version of the chart" + default = "1.1.2" + type = string +} + +variable "image_name" { + description = "The name of the image to be deployed by NIM. Should be /" + default = "meta/llama3-8b-instruct" + type = string +} + +variable "image_tag" { + description = "The tag of the image to be deployed by NIM" + default = "1.0.0" + type = string +} diff --git a/best-practices/ml-platform/terraform/features/nim/versions.tf b/best-practices/ml-platform/terraform/features/nim/versions.tf new file mode 100644 index 000000000..a6c337a11 --- /dev/null +++ b/best-practices/ml-platform/terraform/features/nim/versions.tf @@ -0,0 +1,30 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "6.2.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "2.32.0" + } + helm = { + source = "hashicorp/helm" + version = "2.15.0" + } + } +}