From 7c289d7db0ba2d3fc0cd7f9db4575696011d6256 Mon Sep 17 00:00:00 2001 From: David Grove Date: Mon, 11 Nov 2024 16:44:28 -0500 Subject: [PATCH] bugfix: proper handling of values set to 0 (#97) `if .Values.X` evaluates to false if X is defined to be 0. This resulted in retryLimit and terminationGracePeriodSeconds not being properly propagated through the the generated yaml if the user sets their value to 0 (as opposed to nil). --- .../chart/templates/_helpers.tpl | 2 +- .../chart/templates/appwrapper.yaml | 2 +- .../chart/tests/helloworld_test.yaml | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/pytorchjob-generator/chart/templates/_helpers.tpl b/tools/pytorchjob-generator/chart/templates/_helpers.tpl index d4aa42e..f4e4fd4 100644 --- a/tools/pytorchjob-generator/chart/templates/_helpers.tpl +++ b/tools/pytorchjob-generator/chart/templates/_helpers.tpl @@ -29,7 +29,7 @@ annotations: {{- define "mlbatch.schedulingSpec" }} -{{- if .Values.terminationGracePeriodSeconds }} +{{- if ne .Values.terminationGracePeriodSeconds nil }} terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} {{- end }} {{- if .Values.bypassCoscheduler }} diff --git a/tools/pytorchjob-generator/chart/templates/appwrapper.yaml b/tools/pytorchjob-generator/chart/templates/appwrapper.yaml index 91420fc..6b8543c 100644 --- a/tools/pytorchjob-generator/chart/templates/appwrapper.yaml +++ b/tools/pytorchjob-generator/chart/templates/appwrapper.yaml @@ -67,7 +67,7 @@ metadata: {{- if .Values.retryPausePeriodDuration }} workload.codeflare.dev.appwrapper/retryPausePeriodDuration: "{{ .Values.retryPausePeriodDuration }}" {{- end }} - {{- if .Values.retryLimit }} + {{- if ne .Values.retryLimit nil }} workload.codeflare.dev.appwrapper/retryLimit: "{{ .Values.retryLimit }}" {{- end }} {{- if .Values.forcefulDeletionGracePeriodDuration }} diff --git a/tools/pytorchjob-generator/chart/tests/helloworld_test.yaml b/tools/pytorchjob-generator/chart/tests/helloworld_test.yaml index 976afbf..89ae562 100644 --- a/tools/pytorchjob-generator/chart/tests/helloworld_test.yaml +++ b/tools/pytorchjob-generator/chart/tests/helloworld_test.yaml @@ -164,6 +164,22 @@ tests: workload.codeflare.dev.appwrapper/forcefulDeletionGracePeriodDuration: "19s" workload.codeflare.dev.appwrapper/deletionOnFailureGracePeriodDuration: "2s" +- it: Setting integer fault tolerance annotation to 0 + set: + retryLimit: 0 + terminationGracePeriodSeconds: 0 + asserts: + - isSubset: + path: metadata.annotations + content: + workload.codeflare.dev.appwrapper/retryLimit: "0" + - equal: + path: spec.components[0].template.spec.pytorchReplicaSpecs.Master.template.spec.terminationGracePeriodSeconds + value: 0 + - equal: + path: spec.components[0].template.spec.pytorchReplicaSpecs.Worker.template.spec.terminationGracePeriodSeconds + value: 0 + - it: Setting just one tolerance annotation set: deletionOnFailureGracePeriodDuration: "6h"