Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

neonvm: add readiness probe for sysfs scaling #1190

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions neonvm-runner/cmd/httpserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ import (
)

type cpuServerCallbacks struct {
get func(*zap.Logger) (*vmv1.MilliCPU, error)
set func(*zap.Logger, vmv1.MilliCPU) error
get func(*zap.Logger) (*vmv1.MilliCPU, error)
set func(*zap.Logger, vmv1.MilliCPU) error
ready func(*zap.Logger) bool
}

func listenForHTTPRequests(
Expand All @@ -42,6 +43,13 @@ func listenForHTTPRequests(
mux.HandleFunc("/cpu_current", func(w http.ResponseWriter, r *http.Request) {
handleCPUCurrent(cpuCurrentLogger, w, r, callbacks.get)
})
mux.HandleFunc("/ready", func(w http.ResponseWriter, r *http.Request) {
if callbacks.ready(logger) {
w.WriteHeader(200)
} else {
w.WriteHeader(500)
}
})
if networkMonitoring {
reg := prometheus.NewRegistry()
metrics := NewMonitoringMetrics(reg)
Expand Down
38 changes: 38 additions & 0 deletions neonvm-runner/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,20 @@ func runQEMU(
lastValue.Store(uint32(cpu))
return nil
},
ready: func(logger *zap.Logger) bool {
// if we are in sysfs mode, we need to check if the NeonVM Daemon is ready
if cfg.cpuScalingMode == vmv1.CpuScalingModeSysfs {
err := checkNeonvmDaemonCPU()
if err != nil {
logger.Warn("neonvm-daemon ready probe failed", zap.Error(err))
return false
}
return true
}
// do nothing for QMP mode

return false
},
}

wg.Add(1)
Expand Down Expand Up @@ -753,3 +767,27 @@ func setNeonvmDaemonCPU(cpu vmv1.MilliCPU) error {

return nil
}

// checkNeonvmDaemonCPU sends a GET request to the NeonVM Daemon to get the current CPU limit for the sake of readiness probe.
func checkNeonvmDaemonCPU() error {
_, vmIP, _, err := calcIPs(defaultNetworkCIDR)
if err != nil {
return fmt.Errorf("could not calculate VM IP address: %w", err)
}
ctx, cancel := context.WithTimeout(context.TODO(), time.Second)
defer cancel()
url := fmt.Sprintf("http://%s:25183/cpu", vmIP)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return fmt.Errorf("could not build request: %w", err)
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("could not send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("neonvm-daemon responded with status %d", resp.StatusCode)
}
return nil
}
1 change: 1 addition & 0 deletions neonvm/samples/vm-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ kind: VirtualMachine
metadata:
name: example
spec:
cpuScalingMode: SysfsScaling
guest:
cpus:
min: 1
Expand Down
53 changes: 52 additions & 1 deletion pkg/neonvm/controllers/vm_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apiserver/pkg/storage/names"
"k8s.io/client-go/tools/record"

Expand Down Expand Up @@ -864,12 +865,45 @@ func runnerStatus(pod *corev1.Pod) runnerStatusKind {
case corev1.PodFailed:
return runnerFailed
case corev1.PodRunning:
return runnerRunning
return isRunnerPodReady(pod)
default:
panic(fmt.Errorf("unknown pod phase: %q", pod.Status.Phase))
}
}

// isRunnerPodReady returns whether the runner pod is ready respecting the readiness probe of its containers.
func isRunnerPodReady(pod *corev1.Pod) runnerStatusKind {
// if the pod has no container statuses, we consider it pending
if pod.Status.ContainerStatuses == nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: Is there a difference between == nil vs len(...) == 0 ? I was a little surprised to see this check on a slice

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You right, I don't think there is any difference for a slice if we use == nil or len(...) == 0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nil slices and non-nil empty slices are two different things: https://go.dev/play/p/UtIWK670wxW

I always use len(...) == 0 to cover both cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I usually tend use != nil or len(slice) depending on the semantics of a particular code (like if I am interested in length or emptiness for sure) but I am ok with using any option here.

return runnerPending
}

// first we check if the pod is a target pod for the migration
// in that case we don't wait for the readiness probe
// from neonvm-daemon because qemu started
// in incoming migration mode
for _, containerSpec := range pod.Spec.Containers {
if containerSpec.Name == "neonvm-runner" {
for _, env := range containerSpec.Env {
if env.Name == "RECEIVE_MIGRATION" && env.Value == "true" {
return runnerRunning
}
}
}
}

// if the pod is not a target pod for the migration
// we check the neonvm-runner container
// for the readiness probe
for _, c := range pod.Status.ContainerStatuses {
// we only care about the neonvm-runner container
if c.Name == "neonvm-runner" && !c.Ready {
return runnerPending
}
}
return runnerRunning
}

// deleteRunnerPodIfEnabled deletes the runner pod if buildtag.NeverDeleteRunnerPods is false, and
// then emits an event and log line about what it did, whether it actually deleted the runner pod.
func (r *VMReconciler) deleteRunnerPodIfEnabled(
Expand Down Expand Up @@ -1178,6 +1212,10 @@ func imageForVmRunner() (string, error) {
return image, nil
}

// podSpec returns a VirtualMachine Pod object
// withReadinessProbe - if true, adds a readiness probe to the container
// we don't need readiness probe for the VM runner pod if it is a target pod
// for migration because VM is not started until migration is complete
func podSpec(
vm *vmv1.VirtualMachine,
sshSecret *corev1.Secret,
Expand Down Expand Up @@ -1371,6 +1409,19 @@ func podSpec(
},
}

pod.Spec.Containers[0].ReadinessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/ready",
Port: intstr.FromInt32(vm.Spec.RunnerPort),
Scheme: corev1.URISchemeHTTP,
},
},
InitialDelaySeconds: 5,
PeriodSeconds: 5,
FailureThreshold: 3,
}

if sshSecret != nil {
pod.Spec.Containers[0].VolumeMounts = append(pod.Spec.Containers[0].VolumeMounts,
corev1.VolumeMount{
Expand Down
1 change: 1 addition & 0 deletions pkg/neonvm/controllers/vmmigration_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,7 @@ func (r *VirtualMachineMigrationReconciler) targetPodForVirtualMachine(
pod.Name = migration.Status.TargetPodName

// add env variable to turn on migration receiver
// TODO: make it false or empty after the migration is done to enable correct readiness probe
pod.Spec.Containers[0].Env = append(pod.Spec.Containers[0].Env, corev1.EnvVar{Name: "RECEIVE_MIGRATION", Value: "true"})

// add podAntiAffinity to schedule target pod to another k8s node
Expand Down
2 changes: 1 addition & 1 deletion vm-builder/files/inittab
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
::sysinit:/neonvm/bin/vminit
::once:/neonvm/bin/touch /neonvm/vmstart.allowed
::respawn:/neonvm/bin/neonvmd --addr=0.0.0.0:25183
::respawn:/neonvm/bin/udhcpc -t 1 -T 1 -A 1 -f -i eth0 -O 121 -O 119 -s /neonvm/bin/udhcpc.script
::respawn:/neonvm/bin/udevd
::wait:/neonvm/bin/udev-init.sh
::respawn:/neonvm/bin/acpid -f -c /neonvm/acpi
::respawn:/neonvm/bin/vector -c /neonvm/config/vector.yaml --config-dir /etc/vector --color never
::respawn:/neonvm/bin/chronyd -n -f /neonvm/config/chrony.conf -l /var/log/chrony/chrony.log
::respawn:/neonvm/bin/sshd -E /var/log/ssh.log -f /neonvm/config/sshd_config
::respawn:/neonvm/bin/neonvmd --addr=0.0.0.0:25183
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: Why this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I thought I covered it in the PR description.
We rely on neonvmd as a source of info for the readiness check.
For example if we start neonvmd first in the VM it might be that neonvmd is ready and serving requests but acpid or udevd have not yet even started.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC then, the idea behind moving neonvmd down is to give other daemons a chance to start first, right?

Is that actually guaranteed?

::respawn:/neonvm/bin/vmstart
{{ range .InittabCommands }}
::{{.SysvInitAction}}:su -p {{.CommandUser}} -c {{.ShellEscapedCommand}}
Expand Down
Loading