-
OutlineAs a human you are able to take this into account while you perform In addition to this, it also depends on the people operating (or rather watching) over the This is why we need some knobs (but not too many) to be able to offer users the automated behaviour they are after for their precious release. ProposalThe idea is to add a Spec changesRemediationdiff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md
index bf6010d..127c196 100644
--- a/docs/spec/v2alpha1/helmreleases.md
+++ b/docs/spec/v2alpha1/helmreleases.md
@@ -96,6 +96,21 @@ type HelmChartTemplate struct {
Interval *metav1.Duration `json:"interval,omitempty"`
}
+type Remediation struct {
+ // Retries is the number of retries that should be attempted on failures before
+ // bailing. Defaults to '0', a negative integer equals to unlimited retries.
+ // +optional
+ Retries int `json:"retries,omitempty"`
+
+ // Strategy to use for the remediation of a failed action.
+ // +kubebuilder:validation:Enum=rollback,uninstall,ignore
+ // +optional
+ Strategy string `json:"strategy,omitempty"`
+
+ // DoNotRemediateOnLastRetry tells the controller to skip remediation when
+ // there are no retries left so the release can be debugged.
+ // +optional
+ DoNotRemediateOnLastRetry bool `json:"doNotRemediateOnLastRetry,omitempty"`
+} Installdiff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md
index bf6010d..127c196 100644
// Install holds the configuration for Helm install actions.
type Install struct {
// Timeout is the time to wait for any individual Kubernetes operation (like Jobs
@@ -104,6 +119,12 @@ type Install struct {
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty"`
+ // Remediation holds the remediation configuration for when the
+ // Helm install action fails. The default install failure
+ // remediation is an uninstall action.
+ // +optional
+ Remediation Remediation `json:"remediation,omitempty"`
+
// DisableWait disables the waiting for resources to be ready after a
// Helm install has been performed.
// +optional Upgradediff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md
index bf6010d..127c196 100644
@@ -137,10 +158,11 @@ type Upgrade struct {
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty"`
- // MaxRetries is the number of retries that should be attempted on failures before
- // bailing. Defaults to '0', a negative integer equals to unlimited retries.
+ // Remediation holds the remediation configuration for when the
+ // Helm upgrade action for the HelmRelease fails. The default
+ // upgrade failure remediation is to not perform any action.
// +optional
- MaxRetries int `json:"maxRetries,omitempty"`
+ Remediation Remediation `json:"remediation,omitempty"`
// DisableWait disables the waiting for resources to be ready after a
// Helm upgrade has been performed. Testdiff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md
index bf6010d..127c196 100644
@@ -179,6 +201,12 @@ type Test struct {
// +optional
Enable bool `json:"enable,omitempty"`
+ // Remediation holds the remediation configuration for when the
+ // Helm test action for the HelmRelease fails. The default test
+ // failure remediation is to ignore the test result.
+ // +optional
+ Remediation Remediation `json:"remediation,omitempty"`
+
// Timeout is the time to wait for any individual Kubernetes operation
// during the performance of a Helm test action. Defaults to
// 'HelmReleaseSpec.Timeout'. Rollbackdiff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md
index bf6010d..127c196 100644
@@ -188,11 +216,6 @@ type Test struct {
// Rollback holds the configuration for Helm rollback actions.
type Rollback struct {
- // Enable enables Helm rollback actions for this release after an
- // Helm install or upgrade action failure.
- // +optional
- Enable bool `json:"enable,omitempty"`
-
// Timeout is the time to wait for any individual Kubernetes operation (like Jobs
// for hooks) during the performance of a Helm rollback action. Defaults to
// 'HelmReleaseSpec.Timeout'. ExamplesInfinite installation retries, rollback on upgrade failure, ignore test errors: spec:
install:
remediation:
retries: -1
upgrade:
remediation:
strategy: rollback
test:
enable: true
remediation:
strategy: ignore Keep failed installation on last retry, rollback on upgrade failure, rollback on test errors: spec:
install:
remediation:
retries: 5
doNotRemediateOnLastRetry: true
upgrade:
remediation:
strategy: rollback
test:
enable: true
remediation:
strategy: rollback Uninstall on test and upgrade failures: spec:
upgrade:
remediation:
strategy: uninstall
test:
enable: true
remediation:
strategy: uninstall Notes
|
Beta Was this translation helpful? Give feedback.
Replies: 3 comments 9 replies
-
Given the division into Install/Upgrade/Test stanzas, this is an economic expression of rollback parameters (so easy for a user to encode what they want, with not too much fuss) 👍
That latter part seems counter-intuitive -- surely if tests are enabled, and fail, the release should not be considered ready? |
Beta Was this translation helpful? Give feedback.
-
Thanks for taking up this discussion! I like the uniformity of the
One other small thing, is the reason for the verbosity of Updating the API sketch from before, these are the fields I see as definitely useful (ignoring #103 for now), with proposed defaults: spec:
install:
retries: 0
remediateLastRetry: false
upgrade:
retries: 0
remediateLastRetry: false
test:
enable: true
remediateFailures: true |
Beta Was this translation helpful? Give feedback.
-
To combine the starter post and #102 (reply in thread), and to be able to select it as the answer. Spec changesInstalldiff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md
index bf6010d..127c196 100644
--- a/docs/spec/v2alpha1/helmreleases.md
+++ b/docs/spec/v2alpha1/helmreleases.md
@@ -96,6 +96,21 @@ type HelmChartTemplate struct {
Interval *metav1.Duration `json:"interval,omitempty"`
}
+type InstallRemediation struct {
+ // Retries is the number of retries that should be attempted on failures before
+ // bailing. Defaults to '0', a negative integer equals to unlimited retries.
+ // +optional
+ Retries int `json:"retries,omitempty"`
+
+ // IgnoreTestFailures tells the controller to skip remediation when
+ // the Helm tests are run after an install action but fail.
+ // Defaults to 'Test.IgnoreTestFailures'.
+ // +optional
+ IgnoreTestFailures bool `json:"ignoreTestFailures,omitempty"`
+
+ // SkipLastRemediation tells the controller to skip remediation when
+ // there are no retries left so the release can be debugged.
+ // +optional
+ SkipLastRemediation bool `json:"skipLastRemediation,omitempty"`
+} diff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md
index bf6010d..127c196 100644
// Install holds the configuration for Helm install actions.
type Install struct {
// Timeout is the time to wait for any individual Kubernetes operation (like Jobs
@@ -104,6 +119,12 @@ type Install struct {
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty"`
+ // Remediation holds the remediation configuration for when the
+ // Helm install action fails. The default install failure
+ // remediation is an uninstall action.
+ // +optional
+ Remediation InstallRemediation `json:"remediation,omitempty"`
+
// DisableWait disables the waiting for resources to be ready after a
// Helm install has been performed.
// +optional Upgradediff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md
index bf6010d..127c196 100644
--- a/docs/spec/v2alpha1/helmreleases.md
+++ b/docs/spec/v2alpha1/helmreleases.md
@@ -96,6 +96,21 @@ type HelmChartTemplate struct {
Interval *metav1.Duration `json:"interval,omitempty"`
}
+type UpgradeRemediation struct {
+ // Retries is the number of retries that should be attempted on failures before
+ // bailing. Defaults to '0', a negative integer equals to unlimited retries.
+ // +optional
+ Retries int `json:"retries,omitempty"`
+
+ // Strategy to use for the remediation of a failed action.
+ // +kubebuilder:validation:Enum=rollback,uninstall
+ // +optional
+ Strategy string `json:"strategy,omitempty"`
+
+ // IgnoreTestFailures tells the controller to skip remediation when
+ // the Helm tests are run after an upgrade action but fail.
+ // Defaults to 'Test.IgnoreTestFailures'.
+ // +optional
+ IgnoreTestFailures bool `json:"ignoreTestFailures,omitempty"`
+
+ // SkipLastRemediation tells the controller to skip remediation when
+ // there are no retries left so the release can be debugged.
+ // +optional
+ SkipLastRemediation bool `json:"skipLastRemediation,omitempty"`
+} diff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md
index bf6010d..127c196 100644
@@ -137,10 +158,11 @@ type Upgrade struct {
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty"`
- // MaxRetries is the number of retries that should be attempted on failures before
- // bailing. Defaults to '0', a negative integer equals to unlimited retries.
+ // Remediation holds the remediation configuration for when the
+ // Helm upgrade action for the HelmRelease fails. The default
+ // upgrade failure remediation is to not perform any action.
// +optional
- MaxRetries int `json:"maxRetries,omitempty"`
+ Remediation UpgradeRemediation `json:"remediation,omitempty"`
// DisableWait disables the waiting for resources to be ready after a
// Helm upgrade has been performed. Testdiff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md
index bf6010d..127c196 100644
@@ -179,6 +201,12 @@ type Test struct {
// +optional
Enable bool `json:"enable,omitempty"`
+ // IgnoreTestFailures tells the controller to skip remediation when
+ // the Helm tests are run but fail.
+ // Can be overwritten for tests run after install or upgrade actions
+ // in 'Install.IgnoreTestFailures' and 'Upgrade.IgnoreTestFailures'.
+ // +optional
+ IgnoreTestFailures bool `json:"ignoreTestFailures,omitempty"`
// Timeout is the time to wait for any individual Kubernetes operation
// during the performance of a Helm test action. Defaults to
// 'HelmReleaseSpec.Timeout'. Rollbackdiff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md
index bf6010d..127c196 100644
@@ -188,11 +216,6 @@ type Test struct {
// Rollback holds the configuration for Helm rollback actions.
type Rollback struct {
- // Enable enables Helm rollback actions for this release after an
- // Helm install or upgrade action failure.
- // +optional
- Enable bool `json:"enable,omitempty"`
-
// Timeout is the time to wait for any individual Kubernetes operation (like Jobs
// for hooks) during the performance of a Helm rollback action. Defaults to
// 'HelmReleaseSpec.Timeout'. Behavioral notes
|
Beta Was this translation helpful? Give feedback.
To combine the starter post and #102 (reply in thread), and to be able to select it as the answer.
Spec changes
Install