Skip to content

Commit

Permalink
refactor: add StartUpProbe
Browse files Browse the repository at this point in the history
  • Loading branch information
zyy17 committed Oct 25, 2024
1 parent d8d1472 commit f9c3fc7
Show file tree
Hide file tree
Showing 15 changed files with 1,856 additions and 20 deletions.
10 changes: 10 additions & 0 deletions apis/v1alpha1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,16 @@ type MainContainerSpec struct {
// +optional
ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"`

// StartupProbe indicates that the Pod has successfully initialized.
// If specified, no other probes are executed until this completes successfully.
// If this probe fails, the Pod will be restarted, just as if the livenessProbe failed.
// This can be used to provide different probe parameters at the beginning of a Pod's lifecycle,
// when it might take a long time to load data or warm a cache, than during steady-state operation.
// This cannot be updated.
// More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
// +optional
StartupProbe *corev1.Probe `json:"startupProbe,omitempty"`

// Actions that the management system should take in response to container lifecycle events.
// Cannot be updated.
// Lifecycle field is from `corev1.Container.Lifecycle`.
Expand Down
77 changes: 59 additions & 18 deletions apis/v1alpha1/defaulting.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,9 @@ func (in *GreptimeDBCluster) defaultSpec() *GreptimeDBClusterSpec {
var defaultSpec = &GreptimeDBClusterSpec{
Base: &PodTemplateSpec{
MainContainer: &MainContainerSpec{
// The default liveness probe for the main container of GreptimeDBCluster.
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: DefaultHealthEndpoint,
Port: intstr.FromInt32(DefaultHTTPPort),
},
},
},
StartupProbe: defaultStartupProbe(),
LivenessProbe: defaultLivenessProbe(),
ReadinessProbe: defaultReadinessProbe(),
},
},
Initializer: &InitializerSpec{Image: DefaultInitializerImage},
Expand Down Expand Up @@ -248,7 +242,9 @@ func (in *GreptimeDBCluster) mergeFrontendTemplate() error {
}

// Reconfigure the probe settings based on the HTTP port.
in.Spec.Frontend.Template.MainContainer.StartupProbe.HTTPGet.Port = intstr.FromInt32(in.Spec.Frontend.HTTPPort)
in.Spec.Frontend.Template.MainContainer.LivenessProbe.HTTPGet.Port = intstr.FromInt32(in.Spec.Frontend.HTTPPort)
in.Spec.Frontend.Template.MainContainer.ReadinessProbe.HTTPGet.Port = intstr.FromInt32(in.Spec.Frontend.HTTPPort)
}

return nil
Expand All @@ -262,7 +258,9 @@ func (in *GreptimeDBCluster) mergeMetaTemplate() error {
}

// Reconfigure the probe settings based on the HTTP port.
in.Spec.Meta.Template.MainContainer.StartupProbe.HTTPGet.Port = intstr.FromInt32(in.Spec.Meta.HTTPPort)
in.Spec.Meta.Template.MainContainer.LivenessProbe.HTTPGet.Port = intstr.FromInt32(in.Spec.Meta.HTTPPort)
in.Spec.Meta.Template.MainContainer.ReadinessProbe.HTTPGet.Port = intstr.FromInt32(in.Spec.Meta.HTTPPort)
}

return nil
Expand All @@ -276,7 +274,9 @@ func (in *GreptimeDBCluster) mergeDatanodeTemplate() error {
}

// Reconfigure the probe settings based on the HTTP port.
in.Spec.Datanode.Template.MainContainer.StartupProbe.HTTPGet.Port = intstr.FromInt32(in.Spec.Datanode.HTTPPort)
in.Spec.Datanode.Template.MainContainer.LivenessProbe.HTTPGet.Port = intstr.FromInt32(in.Spec.Datanode.HTTPPort)
in.Spec.Datanode.Template.MainContainer.ReadinessProbe.HTTPGet.Port = intstr.FromInt32(in.Spec.Datanode.HTTPPort)
}

return nil
Expand All @@ -290,7 +290,9 @@ func (in *GreptimeDBCluster) mergeFlownodeTemplate() error {
}

// TODO(zyy17): The flownode does not need liveness probe and will be added in the future.
in.Spec.Flownode.Template.MainContainer.StartupProbe = nil
in.Spec.Flownode.Template.MainContainer.LivenessProbe = nil
in.Spec.Flownode.Template.MainContainer.ReadinessProbe = nil
}

return nil
Expand All @@ -314,15 +316,9 @@ func (in *GreptimeDBStandalone) defaultSpec() *GreptimeDBStandaloneSpec {
var defaultSpec = &GreptimeDBStandaloneSpec{
Base: &PodTemplateSpec{
MainContainer: &MainContainerSpec{
// The default liveness probe for the main container of GreptimeDBStandalone.
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: DefaultHealthEndpoint,
Port: intstr.FromInt32(DefaultHTTPPort),
},
},
},
StartupProbe: defaultStartupProbe(),
LivenessProbe: defaultLivenessProbe(),
ReadinessProbe: defaultReadinessProbe(),
},
},
HTTPPort: DefaultHTTPPort,
Expand Down Expand Up @@ -379,3 +375,48 @@ func getVersionFromImage(imageURL string) string {
}
return DefaultVersion
}

func defaultStartupProbe() *corev1.Probe {
// When StartupProbe is successful, the liveness probe and readiness probe will be enabled.
return &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: DefaultHealthEndpoint,
Port: intstr.FromInt32(DefaultHTTPPort),
},
},
PeriodSeconds: 10,

// The StartupProbe can try up to 30 * 10 = 300 seconds to start the container.
// For some scenarios, the database may take a long time to start, so we set the failure threshold to 30.
FailureThreshold: 30,
}
}

func defaultLivenessProbe() *corev1.Probe {
// If the liveness probe fails, the container will be restarted.
return &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: DefaultHealthEndpoint,
Port: intstr.FromInt32(DefaultHTTPPort),
},
},
PeriodSeconds: 5,
FailureThreshold: 10,
}
}

func defaultReadinessProbe() *corev1.Probe {
// If the readiness probe fails, the container will be removed from the service endpoints.
return &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: DefaultHealthEndpoint,
Port: intstr.FromInt32(DefaultHTTPPort),
},
},
PeriodSeconds: 5,
FailureThreshold: 10,
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,23 @@ spec:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
readinessProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
startupProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 10
failureThreshold: 30
frontend:
replicas: 1
httpPort: 4000
httpPort: 9000
mysqlPort: 4002
postgreSQLPort: 4003
rpcPort: 4001
Expand All @@ -51,7 +65,21 @@ spec:
livenessProbe:
httpGet:
path: /health
port: 4000
port: 9000
periodSeconds: 5
failureThreshold: 10
readinessProbe:
httpGet:
path: /health
port: 9000
periodSeconds: 5
failureThreshold: 10
startupProbe:
httpGet:
path: /health
port: 9000
periodSeconds: 10
failureThreshold: 30
resources:
limits:
cpu: 100m
Expand Down Expand Up @@ -79,6 +107,20 @@ spec:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
readinessProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
startupProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 10
failureThreshold: 30
resources:
limits:
cpu: 50m
Expand Down Expand Up @@ -110,6 +152,20 @@ spec:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
readinessProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
startupProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 10
failureThreshold: 30
resources:
limits:
cpu: 100m
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ spec:
memory: 128Mi
frontend:
replicas: 1
httpPort: 9000
template:
main:
image: greptime/greptimedb:test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@ spec:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
readinessProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
startupProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 10
failureThreshold: 30
frontend:
replicas: 1
httpPort: 4000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@ spec:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
readinessProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
startupProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 10
failureThreshold: 30
resources:
requests:
cpu: "500m"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,20 @@ spec:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
readinessProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
startupProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 10
failureThreshold: 30
frontend:
replicas: 1
httpPort: 4000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@ spec:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
readinessProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
startupProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 10
failureThreshold: 30
version: latest
initializer:
image: greptime/greptimedb-initializer:latest
Expand Down Expand Up @@ -75,6 +89,20 @@ spec:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
readinessProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
startupProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 10
failureThreshold: 30
datanodeStorage:
dataHome: /data/greptimedb
fs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,20 @@ spec:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
readinessProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 5
failureThreshold: 10
startupProbe:
httpGet:
path: /health
port: 4000
periodSeconds: 10
failureThreshold: 30
service:
type: ClusterIP
logging:
Expand Down
5 changes: 5 additions & 0 deletions apis/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f9c3fc7

Please sign in to comment.