diff --git a/applications/rag/variables.tf b/applications/rag/variables.tf index f9bbf537f..c80f39791 100644 --- a/applications/rag/variables.tf +++ b/applications/rag/variables.tf @@ -398,7 +398,7 @@ variable "gpu_pools" { name = "gpu-pool-l4" machine_type = "g2-standard-24" autoscaling = true - min_count = 1 + min_count = 0 max_count = 3 disk_size_gb = 200 disk_type = "pd-balanced" diff --git a/applications/ray/variables.tf b/applications/ray/variables.tf index 63d20c0cf..7b760959b 100644 --- a/applications/ray/variables.tf +++ b/applications/ray/variables.tf @@ -172,7 +172,7 @@ variable "gpu_pools" { name = "gpu-pool-l4" machine_type = "g2-standard-24" autoscaling = true - min_count = 1 + min_count = 0 max_count = 3 disk_size_gb = 100 disk_type = "pd-balanced" diff --git a/modules/kuberay-cluster/values.yaml b/modules/kuberay-cluster/values.yaml index cec35e35f..a1028fd0a 100644 --- a/modules/kuberay-cluster/values.yaml +++ b/modules/kuberay-cluster/values.yaml @@ -36,7 +36,7 @@ head: # If enableInTreeAutoscaling is true, the autoscaler sidecar will be added to the Ray head pod. # Ray autoscaler integration is supported only for Ray versions >= 1.11.0 # Ray autoscaler integration is Beta with KubeRay >= 0.3.0 and Ray >= 2.0.0. - # enableInTreeAutoscaling: true + enableInTreeAutoscaling: true # autoscalerOptions is an OPTIONAL field specifying configuration overrides for the Ray autoscaler. # The example configuration shown below below represents the DEFAULT values. # autoscalerOptions: @@ -95,17 +95,17 @@ head: # Ray recommends at least 8G memory for production workloads. memory: "8G" # Sum of ephemeral storage requests must be max 10Gi on Autopilot default class. - # This includes, ray-head, gcsfuse-sidecar, and fluent-bit. - ephemeral-storage: 4Gi + # This includes, ray-head, gcsfuse-sidecar, fluent-bit, and ray Autoscaler sidecar which requests 1Gi by default. + ephemeral-storage: 3Gi requests: cpu: "4" memory: "8G" - ephemeral-storage: 4Gi + ephemeral-storage: 3Gi annotations: gke-gcsfuse/volumes: "true" gke-gcsfuse/cpu-limit: "1" gke-gcsfuse/memory-limit: 2Gi - gke-gcsfuse/ephemeral-storage-limit: 4Gi + gke-gcsfuse/ephemeral-storage-limit: 3Gi nodeSelector: iam.gke.io/gke-metadata-server-enabled: "true" tolerations: [] @@ -165,7 +165,9 @@ worker: # uncomment the line below # disabled: true groupName: workerGroup - replicas: 1 + replicas: 0 + minReplicas: 0 + maxReplicas: 5 type: worker labels: cloud.google.com/gke-ray-node-type: worker