Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Daemon redesign #788

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ RUN make _build-manager BIN_PATH=build/_output/cmd
RUN make _build-sriov-network-operator-config-cleanup BIN_PATH=build/_output/cmd

FROM quay.io/centos/centos:stream9
RUN yum -y install delve procps-ng
COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/manager /usr/bin/sriov-network-operator
COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/sriov-network-operator-config-cleanup /usr/bin/sriov-network-operator-config-cleanup
COPY bindata /bindata
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.sriov-network-config-daemon
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ FROM quay.io/centos/centos:stream9
ARG MSTFLINT=mstflint
# We have to ensure that pciutils is installed. This package is needed for mstfwreset to succeed.
# xref pkg/vendors/mellanox/mellanox.go#L150
RUN ARCH_DEP_PKGS=$(if [ "$(uname -m)" != "s390x" ]; then echo -n ${MSTFLINT} ; fi) && yum -y install hwdata pciutils $ARCH_DEP_PKGS && yum clean all
RUN ARCH_DEP_PKGS=$(if [ "$(uname -m)" != "s390x" ]; then echo -n ${MSTFLINT} ; fi) && yum -y install delve procps-ng hwdata pciutils $ARCH_DEP_PKGS && yum clean all
LABEL io.k8s.display-name="sriov-network-config-daemon" \
io.k8s.description="This is a daemon that manage and config sriov network devices in Kubernetes cluster"
COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/sriov-network-config-daemon /usr/bin/
Expand Down
16 changes: 14 additions & 2 deletions bindata/manifests/daemon/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,24 @@ spec:
containers:
- name: sriov-network-config-daemon
image: {{.Image}}
command:
- sriov-network-config-daemon
imagePullPolicy: Always
securityContext:
privileged: true
command:
- sriov-network-config-daemon
args:
- "start"
# command:
# - dlv
# - --listen=:2345
# - --headless=true
# - --api-version=2
# - --accept-multiclient
# - --log
# - exec
# - /usr/bin/sriov-network-config-daemon
# - --
# - start
{{- if .UsedSystemdMode}}
- --use-systemd-service
{{- end }}
Expand Down
1 change: 1 addition & 0 deletions bindata/manifests/operator-webhook/server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ spec:
containers:
- name: webhook-server
image: {{.SriovNetworkWebhookImage}}
imagePullPolicy: Always
command:
- webhook
args:
Expand Down
1 change: 1 addition & 0 deletions bindata/manifests/plugins/sriov-device-plugin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ spec:
containers:
- name: sriov-device-plugin
image: {{.SRIOVDevicePluginImage}}
imagePullPolicy: Always
args:
- --log-level=10
- --resource-prefix={{.ResourcePrefix}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ contents: |

[Service]
Type=oneshot
ExecStart=/var/lib/sriov/sriov-network-config-daemon -v 2 --zap-log-level 2 service --phase post
ExecStart=/var/lib/sriov/sriov-network-config-daemon service --phase post
StandardOutput=journal+console

[Install]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ contents: |

[Service]
Type=oneshot
ExecStart=/var/lib/sriov/sriov-network-config-daemon -v 2 --zap-log-level 2 service --phase pre
ExecStart=/var/lib/sriov/sriov-network-config-daemon service --phase pre
StandardOutput=journal+console

[Install]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ spec:

[Service]
Type=oneshot
ExecStart=/var/lib/sriov/sriov-network-config-daemon service -v {{ .LogLevel }} --zap-log-level {{ .LogLevel }} --phase pre
ExecStart=/var/lib/sriov/sriov-network-config-daemon service --phase pre
StandardOutput=journal+console

[Install]
Expand All @@ -38,7 +38,7 @@ spec:

[Service]
Type=oneshot
ExecStart=/var/lib/sriov/sriov-network-config-daemon service -v {{ .LogLevel }} --zap-log-level {{ .LogLevel }} --phase post
ExecStart=/var/lib/sriov/sriov-network-config-daemon service --phase post
StandardOutput=journal+console

[Install]
Expand Down
1 change: 1 addition & 0 deletions bindata/manifests/webhook/server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ spec:
containers:
- name: webhook-server
image: {{.NetworkResourcesInjectorImage}}
imagePullPolicy: Always
command:
- webhook
args:
Expand Down
17 changes: 15 additions & 2 deletions cmd/sriov-network-config-daemon/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ func runServiceCmd(cmd *cobra.Command, args []string) error {
}
// init logger
snolog.InitLog()
snolog.SetLogLevel(2)
setupLog := log.Log.WithName("sriov-config-service").WithValues("phase", phaseArg)

setupLog.V(0).Info("Starting sriov-config-service", "version", version.Version)
Expand Down Expand Up @@ -163,12 +164,24 @@ func phasePre(setupLog logr.Logger, conf *systemd.SriovConfig, hostHelpers helpe
hostHelpers.TryEnableTun()
hostHelpers.TryEnableVhostNet()

return callPlugin(setupLog, PhasePre, conf, hostHelpers)
// Add retry here as some time the kernel is not fully loaded, and we see issues like
// unbindDriver(): failed to unbind driver "error": "open /sys/bus/pci/drivers/igbvf/unbind: permission denied"
i := 0
for i < 5 {
err = callPlugin(setupLog, PhasePre, conf, hostHelpers)
if err == nil {
break
}
i++
time.Sleep(time.Second)
}

return err
}

func phasePost(setupLog logr.Logger, conf *systemd.SriovConfig, hostHelpers helper.HostHelpersInterface) error {
setupLog.V(0).Info("check result of the Pre phase")
prePhaseResult, err := systemd.ReadSriovResult()
prePhaseResult, _, err := systemd.ReadSriovResult()
if err != nil {
return fmt.Errorf("failed to read result of the pre phase: %v", err)
}
Expand Down
6 changes: 3 additions & 3 deletions cmd/sriov-network-config-daemon/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,9 @@ var _ = Describe("Service", func() {
hostHelpers.EXPECT().TryEnableVhostNet().Return()
hostHelpers.EXPECT().DiscoverSriovDevices(hostHelpers).Return([]sriovnetworkv1.InterfaceExt{{
Name: "enp216s0f0np0",
}}, nil)
genericPlugin.EXPECT().OnNodeStateChange(newNodeStateContainsDeviceMatcher("enp216s0f0np0")).Return(true, false, nil)
genericPlugin.EXPECT().Apply().Return(testError)
}}, nil).Times(5)
genericPlugin.EXPECT().OnNodeStateChange(newNodeStateContainsDeviceMatcher("enp216s0f0np0")).Return(true, false, nil).Times(5)
genericPlugin.EXPECT().Apply().Return(testError).Times(5)

Expect(runServiceCmd(&cobra.Command{}, []string{})).To(MatchError(ContainSubstring("test")))

Expand Down
Loading
Loading