From c25a4f9ffc4ac4693c8396944b2aa0d3155445f9 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Wed, 26 Jun 2024 18:22:59 +0200 Subject: [PATCH 1/9] e2e: Stick to a stable OCP version Signed-off-by: Andrea Panattoni --- hack/run-e2e-conformance-virtual-ocp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/run-e2e-conformance-virtual-ocp.sh b/hack/run-e2e-conformance-virtual-ocp.sh index 5e80d3683..6f5d7d72f 100755 --- a/hack/run-e2e-conformance-virtual-ocp.sh +++ b/hack/run-e2e-conformance-virtual-ocp.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -xeo pipefail -OCP_VERSION=${OCP_VERSION:-4.16.0-rc.2} +OCP_VERSION=${OCP_VERSION:-4.16.0} cluster_name=${CLUSTER_NAME:-ocp-virt} domain_name=lab From f439d47b4c5c06999044fc9f67c4d01a9137a47a Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Tue, 19 Mar 2024 10:22:03 +0200 Subject: [PATCH 2/9] Deploy RDMA CNI as a part of SR-IOV Operator Signed-off-by: Ivan Kolodiazhnyi --- bindata/manifests/daemon/daemonset.yaml | 17 +++++++++++++++++ controllers/sriovoperatorconfig_controller.go | 1 + controllers/suite_test.go | 2 ++ deploy/operator.yaml | 2 ++ .../sriov-network-operator-chart/README.md | 1 + .../templates/operator.yaml | 2 ++ .../sriov-network-operator-chart/values.yaml | 1 + hack/env.sh | 4 ++++ hack/release/chart-update.sh | 2 ++ hack/run-e2e-test.sh | 1 + 10 files changed, 33 insertions(+) diff --git a/bindata/manifests/daemon/daemonset.yaml b/bindata/manifests/daemon/daemonset.yaml index b5126ae28..1c3d7d618 100644 --- a/bindata/manifests/daemon/daemonset.yaml +++ b/bindata/manifests/daemon/daemonset.yaml @@ -92,6 +92,23 @@ spec: mountPath: /host/etc/os-release readOnly: true {{- end }} + {{- if .RDMACNIImage }} + - name: rdma-cni + image: {{.RDMACNIImage}} + args: ["--no-sleep"] + securityContext: + privileged: true + resources: + requests: + cpu: 10m + memory: 10Mi + volumeMounts: + - name: cnibin + mountPath: /host/opt/cni/bin + - name: os-release + mountPath: /host/etc/os-release + readOnly: true + {{- end }} {{- if .UsedSystemdMode}} - name: sriov-service-copy image: {{.Image}} diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index 6a5088a95..11cfdfc36 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -178,6 +178,7 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context, data.Data["SRIOVCNIImage"] = os.Getenv("SRIOV_CNI_IMAGE") data.Data["SRIOVInfiniBandCNIImage"] = os.Getenv("SRIOV_INFINIBAND_CNI_IMAGE") data.Data["OVSCNIImage"] = os.Getenv("OVS_CNI_IMAGE") + data.Data["RDMACNIImage"] = os.Getenv("RDMA_CNI_IMAGE") data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION") data.Data["ClusterType"] = vars.ClusterType data.Data["DevMode"] = os.Getenv("DEV_MODE") diff --git a/controllers/suite_test.go b/controllers/suite_test.go index b830e2fc2..3951d1b60 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -115,6 +115,8 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) err = os.Setenv("OVS_CNI_IMAGE", "mock-image") Expect(err).NotTo(HaveOccurred()) + err = os.Setenv("RDMA_CNI_IMAGE", "mock-image") + Expect(err).NotTo(HaveOccurred()) err = os.Setenv("SRIOV_DEVICE_PLUGIN_IMAGE", "mock-image") Expect(err).NotTo(HaveOccurred()) err = os.Setenv("NETWORK_RESOURCES_INJECTOR_IMAGE", "mock-image") diff --git a/deploy/operator.yaml b/deploy/operator.yaml index 18eeb434c..4236db22e 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -58,6 +58,8 @@ spec: value: $SRIOV_INFINIBAND_CNI_IMAGE - name: OVS_CNI_IMAGE value: $OVS_CNI_IMAGE + - name: RDMA_CNI_IMAGE + value: $RDMA_CNI_IMAGE - name: SRIOV_DEVICE_PLUGIN_IMAGE value: $SRIOV_DEVICE_PLUGIN_IMAGE - name: NETWORK_RESOURCES_INJECTOR_IMAGE diff --git a/deployment/sriov-network-operator-chart/README.md b/deployment/sriov-network-operator-chart/README.md index 75f681ee9..cbc8dee3d 100644 --- a/deployment/sriov-network-operator-chart/README.md +++ b/deployment/sriov-network-operator-chart/README.md @@ -139,6 +139,7 @@ This section contains general parameters that apply to both the operator and dae | `images.sriovCni` | SR-IOV CNI image | | `images.ibSriovCni` | InfiniBand SR-IOV CNI image | | `images.ovsCni` | OVS CNI image | +| `images.rdmaCni` | RDMA CNI image | | `images.sriovDevicePlugin` | SR-IOV device plugin image | | `images.resourcesInjector` | Resources Injector image | | `images.webhook` | Operator Webhook image | diff --git a/deployment/sriov-network-operator-chart/templates/operator.yaml b/deployment/sriov-network-operator-chart/templates/operator.yaml index 9eebf39cf..cd4e08192 100644 --- a/deployment/sriov-network-operator-chart/templates/operator.yaml +++ b/deployment/sriov-network-operator-chart/templates/operator.yaml @@ -58,6 +58,8 @@ spec: value: {{ .Values.images.ibSriovCni }} - name: OVS_CNI_IMAGE value: {{ .Values.images.ovsCni }} + - name: RDMA_CNI_IMAGE + value: {{ .Values.images.rdmaCni }} - name: SRIOV_DEVICE_PLUGIN_IMAGE value: {{ .Values.images.sriovDevicePlugin }} - name: NETWORK_RESOURCES_INJECTOR_IMAGE diff --git a/deployment/sriov-network-operator-chart/values.yaml b/deployment/sriov-network-operator-chart/values.yaml index 8637f0bfb..73c9aeb9f 100644 --- a/deployment/sriov-network-operator-chart/values.yaml +++ b/deployment/sriov-network-operator-chart/values.yaml @@ -103,6 +103,7 @@ images: sriovCni: ghcr.io/k8snetworkplumbingwg/sriov-cni ibSriovCni: ghcr.io/k8snetworkplumbingwg/ib-sriov-cni ovsCni: ghcr.io/k8snetworkplumbingwg/ovs-cni-plugin + rdmaCni: ghcr.io/k8snetworkplumbingwg/rdma-cni sriovDevicePlugin: ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin resourcesInjector: ghcr.io/k8snetworkplumbingwg/network-resources-injector webhook: ghcr.io/k8snetworkplumbingwg/sriov-network-operator-webhook diff --git a/hack/env.sh b/hack/env.sh index c41c0aad2..28e0007e7 100755 --- a/hack/env.sh +++ b/hack/env.sh @@ -3,6 +3,8 @@ if [ -z $SKIP_VAR_SET ]; then export SRIOV_INFINIBAND_CNI_IMAGE=${SRIOV_INFINIBAND_CNI_IMAGE:-ghcr.io/k8snetworkplumbingwg/ib-sriov-cni} # OVS_CNI_IMAGE can be explicitly set to empty value, use default only if the var is not set export OVS_CNI_IMAGE=${OVS_CNI_IMAGE-ghcr.io/k8snetworkplumbingwg/ovs-cni-plugin} + # RDMA_CNI_IMAGE can be explicitly set to empty value, use default only if the var is not set + export RDMA_CNI_IMAGE=${RDMA_CNI_IMAGE-ghcr.io/k8snetworkplumbingwg/rdma-cni} export SRIOV_DEVICE_PLUGIN_IMAGE=${SRIOV_DEVICE_PLUGIN_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin} export NETWORK_RESOURCES_INJECTOR_IMAGE=${NETWORK_RESOURCES_INJECTOR_IMAGE:-ghcr.io/k8snetworkplumbingwg/network-resources-injector} export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE=${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-operator-config-daemon} @@ -13,6 +15,8 @@ if [ -z $SKIP_VAR_SET ]; then else # ensure that OVS_CNI_IMAGE is set, empty string is a valid value OVS_CNI_IMAGE=${OVS_CNI_IMAGE:-} + # ensure that RDMA_CNI_IMAGE is set, empty string is a valid value + RDMA_CNI_IMAGE=${$RDMA_CNI_IMAGE:-} METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE=${METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE:-} [ -z $SRIOV_CNI_IMAGE ] && echo "SRIOV_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 [ -z $SRIOV_INFINIBAND_CNI_IMAGE ] && echo "SRIOV_INFINIBAND_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1 diff --git a/hack/release/chart-update.sh b/hack/release/chart-update.sh index 282521020..ec4618175 100755 --- a/hack/release/chart-update.sh +++ b/hack/release/chart-update.sh @@ -45,6 +45,7 @@ OPERATOR_TAG=${GITHUB_TAG} IB_SRIOV_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg ib-sriov-cni) SRIOV_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg sriov-cni) OVS_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg ovs-cni) +RDMA_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg rdma-cni) NETWORK_RESOURCE_INJECTOR_TAG=$(get_latest_github_tag k8snetworkplumbingwg network-resources-injector) SRIOV_DEVICE_PLUGIN_TAG=$(get_latest_github_tag k8snetworkplumbingwg sriov-network-device-plugin) METRICS_EXPORTER_TAG=$(get_latest_github_tag k8snetworkplumbingwg sriov-network-metrics-exporter) @@ -61,6 +62,7 @@ $YQ_CMD -i ".images.webhook = \"ghcr.io/${OPERATOR_REPO}/sriov-network-operator- $YQ_CMD -i ".images.sriovCni = \"ghcr.io/k8snetworkplumbingwg/sriov-cni:${SRIOV_CNI_TAG}\"" ${HELM_VALUES} $YQ_CMD -i ".images.ibSriovCni = \"ghcr.io/k8snetworkplumbingwg/ib-sriov-cni:${IB_SRIOV_CNI_TAG}\"" ${HELM_VALUES} $YQ_CMD -i ".images.ovsCni = \"ghcr.io/k8snetworkplumbingwg/ovs-cni-plugin:${OVS_CNI_TAG}\"" ${HELM_VALUES} +$YQ_CMD -i ".images.rdmaCni = \"ghcr.io/k8snetworkplumbingwg/rdma-cni:${RDMA_CNI_TAG}\"" ${HELM_VALUES} $YQ_CMD -i ".images.sriovDevicePlugin = \"ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin:${SRIOV_DEVICE_PLUGIN_TAG}\"" ${HELM_VALUES} $YQ_CMD -i ".images.resourcesInjector = \"ghcr.io/k8snetworkplumbingwg/network-resources-injector:${NETWORK_RESOURCE_INJECTOR_TAG}\"" ${HELM_VALUES} $YQ_CMD -i ".images.metricsExporter = \"ghcr.io/k8snetworkplumbingwg/sriov-network-metrics-exporter:${METRICS_EXPORTER_TAG}\"" ${HELM_VALUES} diff --git a/hack/run-e2e-test.sh b/hack/run-e2e-test.sh index c7d4fea50..a5bb69c05 100755 --- a/hack/run-e2e-test.sh +++ b/hack/run-e2e-test.sh @@ -8,6 +8,7 @@ KUBECONFIG=${KUBECONFIG:-/root/env/ign/auth/kubeconfig} echo ${SRIOV_CNI_IMAGE} echo ${SRIOV_INFINIBAND_CNI_IMAGE} echo ${OVS_CNI_IMAGE} +echo ${RDMA_CNI_IMAGE} echo ${SRIOV_DEVICE_PLUGIN_IMAGE} echo ${NETWORK_RESOURCES_INJECTOR_IMAGE} echo ${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE} From 182fbfc01494cb90b884f6a98f08eb98767379d3 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Mon, 8 Jul 2024 14:42:08 +0200 Subject: [PATCH 3/9] e2e: Fix the podman login to OCP internal registry In OCP 4.16, dockercfg secret moved from the form of "serviceaccount:xyz" to ":xyz". Use bash left trim operator (`${#}`) to make it explicit. Signed-off-by: Andrea Panattoni --- hack/run-e2e-conformance-virtual-ocp.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hack/run-e2e-conformance-virtual-ocp.sh b/hack/run-e2e-conformance-virtual-ocp.sh index 6f5d7d72f..e1bed50ce 100755 --- a/hack/run-e2e-conformance-virtual-ocp.sh +++ b/hack/run-e2e-conformance-virtual-ocp.sh @@ -237,7 +237,11 @@ echo ${auth} > registry-login.conf internal_registry="image-registry.openshift-image-registry.svc:5000" pass=$( jq .\"image-registry.openshift-image-registry.svc:5000\".auth registry-login.conf ) pass=`echo ${pass:1:-1} | base64 -d` -podman login -u serviceaccount -p ${pass:15} $registry --tls-verify=false + +# dockercfg password is in the form `:password`. We need to trim the `:` prefix +pass=${pass#":"} + +podman login -u serviceaccount -p ${pass} $registry --tls-verify=false MAX_RETRIES=20 DELAY_SECONDS=10 From b6fe5b67fd728b591f546fb5969e48b369fcbc22 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 11 Jul 2024 21:48:29 +0300 Subject: [PATCH 4/9] implement RemovePfAppliedStatus function in store Signed-off-by: Sebastian Sch --- pkg/helper/mock/mock_helper.go | 14 ++++++++++++++ pkg/host/store/mock/mock_store.go | 14 ++++++++++++++ pkg/host/store/store.go | 12 ++++++++++++ 3 files changed, 40 insertions(+) diff --git a/pkg/helper/mock/mock_helper.go b/pkg/helper/mock/mock_helper.go index f19902825..5ef289ac4 100644 --- a/pkg/helper/mock/mock_helper.go +++ b/pkg/helper/mock/mock_helper.go @@ -942,6 +942,20 @@ func (mr *MockHostHelpersInterfaceMockRecorder) RemovePersistPFNameUdevRule(pfPc return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemovePersistPFNameUdevRule", reflect.TypeOf((*MockHostHelpersInterface)(nil).RemovePersistPFNameUdevRule), pfPciAddress) } +// RemovePfAppliedStatus mocks base method. +func (m *MockHostHelpersInterface) RemovePfAppliedStatus(pciAddress string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RemovePfAppliedStatus", pciAddress) + ret0, _ := ret[0].(error) + return ret0 +} + +// RemovePfAppliedStatus indicates an expected call of RemovePfAppliedStatus. +func (mr *MockHostHelpersInterfaceMockRecorder) RemovePfAppliedStatus(pciAddress interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemovePfAppliedStatus", reflect.TypeOf((*MockHostHelpersInterface)(nil).RemovePfAppliedStatus), pciAddress) +} + // RemoveVfRepresentorUdevRule mocks base method. func (m *MockHostHelpersInterface) RemoveVfRepresentorUdevRule(pfPciAddress string) error { m.ctrl.T.Helper() diff --git a/pkg/host/store/mock/mock_store.go b/pkg/host/store/mock/mock_store.go index 2e0071dfd..d405543a2 100644 --- a/pkg/host/store/mock/mock_store.go +++ b/pkg/host/store/mock/mock_store.go @@ -79,6 +79,20 @@ func (mr *MockManagerInterfaceMockRecorder) LoadPfsStatus(pciAddress interface{} return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadPfsStatus", reflect.TypeOf((*MockManagerInterface)(nil).LoadPfsStatus), pciAddress) } +// RemovePfAppliedStatus mocks base method. +func (m *MockManagerInterface) RemovePfAppliedStatus(pciAddress string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RemovePfAppliedStatus", pciAddress) + ret0, _ := ret[0].(error) + return ret0 +} + +// RemovePfAppliedStatus indicates an expected call of RemovePfAppliedStatus. +func (mr *MockManagerInterfaceMockRecorder) RemovePfAppliedStatus(pciAddress interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemovePfAppliedStatus", reflect.TypeOf((*MockManagerInterface)(nil).RemovePfAppliedStatus), pciAddress) +} + // SaveLastPfAppliedStatus mocks base method. func (m *MockManagerInterface) SaveLastPfAppliedStatus(PfInfo *v1.Interface) error { m.ctrl.T.Helper() diff --git a/pkg/host/store/store.go b/pkg/host/store/store.go index 77e4bbd32..67c0b17e3 100644 --- a/pkg/host/store/store.go +++ b/pkg/host/store/store.go @@ -20,6 +20,7 @@ import ( type ManagerInterface interface { ClearPCIAddressFolder() error SaveLastPfAppliedStatus(PfInfo *sriovnetworkv1.Interface) error + RemovePfAppliedStatus(pciAddress string) error LoadPfsStatus(pciAddress string) (*sriovnetworkv1.Interface, bool, error) GetCheckPointNodeState() (*sriovnetworkv1.SriovNetworkNodeState, error) @@ -111,6 +112,17 @@ func (s *manager) SaveLastPfAppliedStatus(PfInfo *sriovnetworkv1.Interface) erro return err } +func (s *manager) RemovePfAppliedStatus(pciAddress string) error { + hostExtension := utils.GetHostExtension() + pathFile := filepath.Join(hostExtension, consts.PfAppliedConfig, pciAddress) + err := os.RemoveAll(pathFile) + if err != nil { + log.Log.Error(err, "failed to remove PF status", "pathFile", pathFile) + return err + } + return nil +} + // LoadPfsStatus convert the /etc/sriov-operator/pci/ json to pfstatus // returns false if the file doesn't exist. func (s *manager) LoadPfsStatus(pciAddress string) (*sriovnetworkv1.Interface, bool, error) { From 62e33cdbabf04b589d921805699766cd03d856bc Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 11 Jul 2024 21:48:55 +0300 Subject: [PATCH 5/9] Remove store file on reset Signed-off-by: Sebastian Sch --- pkg/host/internal/sriov/sriov.go | 13 +++++++++++++ pkg/host/internal/sriov/sriov_test.go | 2 ++ 2 files changed, 15 insertions(+) diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index ee21b1539..5a110e2ff 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -817,6 +817,13 @@ func (s *sriov) checkForConfigAndReset(ifaceStatus sriovnetworkv1.InterfaceExt, log.Log.V(2).Info("checkForConfigAndReset(): PF name with pci address was externally created skipping the device reset", "pf-name", ifaceStatus.Name, "address", ifaceStatus.PciAddress) + + // remove pf status from host + err = storeManager.RemovePfAppliedStatus(ifaceStatus.PciAddress) + if err != nil { + return err + } + return nil } err = s.removeUdevRules(ifaceStatus.PciAddress) @@ -828,6 +835,12 @@ func (s *sriov) checkForConfigAndReset(ifaceStatus sriovnetworkv1.InterfaceExt, return err } + // remove pf status from host + err = storeManager.RemovePfAppliedStatus(ifaceStatus.PciAddress) + if err != nil { + return err + } + return nil } diff --git a/pkg/host/internal/sriov/sriov_test.go b/pkg/host/internal/sriov/sriov_test.go index cb647e64d..4a772bb52 100644 --- a/pkg/host/internal/sriov/sriov_test.go +++ b/pkg/host/internal/sriov/sriov_test.go @@ -452,6 +452,7 @@ var _ = Describe("SRIOV", func() { PciAddress: "0000:d8:00.0", NumVfs: 2, }, true, nil) + storeManagerMode.EXPECT().RemovePfAppliedStatus("0000:d8:00.0").Return(nil) netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return( &netlink.DevlinkDevice{Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil) @@ -479,6 +480,7 @@ var _ = Describe("SRIOV", func() { NumVfs: 2, ExternallyManaged: true, }, true, nil) + storeManagerMode.EXPECT().RemovePfAppliedStatus("0000:d8:00.0").Return(nil) Expect(s.ConfigSriovInterfaces(storeManagerMode, []sriovnetworkv1.Interface{}, []sriovnetworkv1.InterfaceExt{ From 556da0086ee2ee5e6bca51d701c4e773dc7814b4 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 11 Jul 2024 21:49:33 +0300 Subject: [PATCH 6/9] Skip firmware reset for devices we don't control Signed-off-by: Sebastian Sch --- pkg/plugins/mellanox/mellanox_plugin.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pkg/plugins/mellanox/mellanox_plugin.go b/pkg/plugins/mellanox/mellanox_plugin.go index 87363464b..353080e42 100644 --- a/pkg/plugins/mellanox/mellanox_plugin.go +++ b/pkg/plugins/mellanox/mellanox_plugin.go @@ -143,6 +143,11 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS processedNics[pciPrefix] = true pciAddress := pciPrefix + "0" + // Skip devices not configured by the operator + if p.nicNotConfiguredByOperator(portsMap) { + continue + } + // Skip externally managed NICs if p.nicHasExternallyManagedPFs(portsMap) { continue @@ -206,3 +211,19 @@ func (p *MellanoxPlugin) nicHasExternallyManagedPFs(nicPortsMap map[string]sriov } return false } + +// nicNotConfiguredByOperator returns true if one of the ports(interface) of the NIC is not configured by operator +func (p *MellanoxPlugin) nicNotConfiguredByOperator(nicPortsMap map[string]sriovnetworkv1.InterfaceExt) bool { + for _, iface := range nicPortsMap { + _, exist, err := p.helpers.LoadPfsStatus(iface.PciAddress) + if err != nil { + log.Log.Error(err, "failed to load PF status from disk", "address", iface.PciAddress) + continue + } + if exist { + log.Log.V(2).Info("PF configured by the operator", "interface", iface) + return true + } + } + return false +} From 411a215bede85e0f93bddba11fe5a7541afa37e7 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 11 Jul 2024 21:50:04 +0300 Subject: [PATCH 7/9] implement functional test for both regular and externally manage Signed-off-by: Sebastian Sch --- test/conformance/tests/test_sriov_operator.go | 50 ++++++++++++++++--- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index d72f6a57a..e1c246af5 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -1222,7 +1222,7 @@ var _ = Describe("[sriov] operator", func() { It("Should be possible to create a vfio-pci resource and allocate to a pod", func() { By("creating a vfio-pci node policy") resourceName := "testvfio" - _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, vfioNic.Name, vfioNode, 5, resourceName, "vfio-pci") + vfioPolicy, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, vfioNic.Name, vfioNode, 5, resourceName, "vfio-pci") Expect(err).ToNot(HaveOccurred()) By("waiting for the node state to be updated") @@ -1249,6 +1249,11 @@ var _ = Describe("[sriov] operator", func() { return allocatable }, 10*time.Minute, time.Second).Should(Equal(int64(5))) + By("validate the pf info exist on host") + output, _, err := runCommandOnConfigDaemon(vfioNode, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(output).ToNot(Equal("1")) + By("Creating sriov network to use the vfio device") sriovNetwork := &sriovv1.SriovNetwork{ ObjectMeta: metav1.ObjectMeta{ @@ -1278,6 +1283,25 @@ var _ = Describe("[sriov] operator", func() { networkStatusJSON, exist := firstPod.Annotations["k8s.v1.cni.cncf.io/network-status"] Expect(exist).To(BeTrue()) Expect(networkStatusJSON).To(ContainSubstring(fmt.Sprintf("\"mtu\": %d", vfioNic.Mtu))) + + By("deleting the policy") + err = clients.Delete(context.Background(), vfioPolicy, &runtimeclient.DeleteOptions{}) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + Eventually(func() int64 { + testedNode, err := clients.CoreV1Interface.Nodes().Get(context.Background(), vfioNode, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + allocatable, _ := resNum.AsInt64() + return allocatable + }, 2*time.Minute, time.Second).Should(Equal(int64(0))) + + By("validate the pf info doesn't exist on the host anymore") + output, _, err = runCommandOnConfigDaemon(vfioNode, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(output).ToNot(Equal("0")) + }) }) @@ -2196,15 +2220,19 @@ var _ = Describe("[sriov] operator", func() { Context("ExternallyManaged Validation", func() { numVfs := 5 var node string - var nic *sriovv1.InterfaceExt + var nic sriovv1.InterfaceExt externallyManage := func(policy *sriovv1.SriovNetworkNodePolicy) { policy.Spec.ExternallyManaged = true } execute.BeforeAll(func() { - var err error - node, nic, err = sriovInfos.FindOneSriovNodeAndDevice() - Expect(err).ToNot(HaveOccurred()) + node, nic = sriovInfos.FindOneVfioSriovDevice() + }) + + BeforeEach(func() { + if node == "" { + Skip("not suitable device found for the test") + } By("Using device " + nic.Name + " on node " + node) }) @@ -2265,6 +2293,11 @@ var _ = Describe("[sriov] operator", func() { return allocatable }, 3*time.Minute, time.Second).Should(Equal(int64(numVfs))) + By("validate the pf info exist on host") + output, _, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(output).ToNot(Equal("1")) + By("deleting the policy") err = clients.Delete(context.Background(), sriovPolicy, &runtimeclient.DeleteOptions{}) Expect(err).ToNot(HaveOccurred()) @@ -2279,11 +2312,16 @@ var _ = Describe("[sriov] operator", func() { }, 2*time.Minute, time.Second).Should(Equal(int64(0))) By("checking the virtual functions are still on the host") - output, errOutput, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("cat /host/sys/class/net/%s/device/sriov_numvfs", nic.Name)) + output, errOutput, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("cat /host/sys/class/net/%s/device/sriov_numvfs", nic.Name)) Expect(err).ToNot(HaveOccurred()) Expect(errOutput).To(Equal("")) Expect(output).To(ContainSubstring("5")) + By("validate the pf info doesn't exist on the host anymore") + output, _, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l") + Expect(err).ToNot(HaveOccurred()) + Expect(output).ToNot(Equal("0")) + By("cleaning the manual sriov created") _, errOutput, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo 0 > /host/sys/class/net/%s/device/sriov_numvfs", nic.Name)) Expect(err).ToNot(HaveOccurred()) From ee7a04a318045f639d69bd249ed3fe1e4883735a Mon Sep 17 00:00:00 2001 From: Tobias Giese Date: Tue, 16 Jul 2024 10:39:28 +0200 Subject: [PATCH 8/9] ci: sent ci-triggers comment only once on PR open or reopen If we specify a type we can stop the Github Actions bot from resending the ci-triggers list to open PRs on every HEAD change. > [...] if no activity types are specified, the workflow runs when a pull request is opened or reopened or when the head branch of the pull request is updated. Signed-off-by: Tobias Giese --- .github/workflows/pr-ci-triggers.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pr-ci-triggers.yml b/.github/workflows/pr-ci-triggers.yml index 6efc6d163..270b36380 100644 --- a/.github/workflows/pr-ci-triggers.yml +++ b/.github/workflows/pr-ci-triggers.yml @@ -1,5 +1,6 @@ on: pull_request_target: + types: [opened, reopened] jobs: vendors-ci-triggers-list: From f4bbcd28384ff5a9bb598e13e267ae4c730342c3 Mon Sep 17 00:00:00 2001 From: adrianc Date: Tue, 16 Jul 2024 14:16:04 +0300 Subject: [PATCH 9/9] Updatge pr-ci-triggers message Only maintainers can trigger vendor CI, mention that in in PR message. Signed-off-by: adrianc --- .github/workflows/pr-ci-triggers.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr-ci-triggers.yml b/.github/workflows/pr-ci-triggers.yml index 270b36380..2c38f6fec 100644 --- a/.github/workflows/pr-ci-triggers.yml +++ b/.github/workflows/pr-ci-triggers.yml @@ -10,12 +10,12 @@ jobs: env: MESSAGE: >- Thanks for your PR,\n - To run vendors CIs use one of:\n + To run vendors CIs, Maintainers can use one of:\n * `/test-all`: To run all tests for all vendors.\n * `/test-e2e-all`: To run all E2E tests for all vendors.\n * `/test-e2e-nvidia-all`: To run all E2E tests for NVIDIA vendor.\n \n - To skip the vendors CIs use one of:\n + To skip the vendors CIs, Maintainers can use one of:\n * `/skip-all`: To skip all tests for all vendors.\n * `/skip-e2e-all`: To skip all E2E tests for all vendors.\n * `/skip-e2e-nvidia-all`: To skip all E2E tests for NVIDIA vendor.\n