Skip to content

Commit

Permalink
Merge pull request #13 from k8snetworkplumbingwg/master
Browse files Browse the repository at this point in the history
sync: from upstream master to master
  • Loading branch information
nvidia-ci-cd authored Jul 15, 2024
2 parents 53c92a3 + 64d69b6 commit 4782697
Show file tree
Hide file tree
Showing 18 changed files with 159 additions and 8 deletions.
17 changes: 17 additions & 0 deletions bindata/manifests/daemon/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,23 @@ spec:
mountPath: /host/etc/os-release
readOnly: true
{{- end }}
{{- if .RDMACNIImage }}
- name: rdma-cni
image: {{.RDMACNIImage}}
args: ["--no-sleep"]
securityContext:
privileged: true
resources:
requests:
cpu: 10m
memory: 10Mi
volumeMounts:
- name: cnibin
mountPath: /host/opt/cni/bin
- name: os-release
mountPath: /host/etc/os-release
readOnly: true
{{- end }}
{{- if .UsedSystemdMode}}
- name: sriov-service-copy
image: {{.Image}}
Expand Down
1 change: 1 addition & 0 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context,
data.Data["SRIOVCNIImage"] = os.Getenv("SRIOV_CNI_IMAGE")
data.Data["SRIOVInfiniBandCNIImage"] = os.Getenv("SRIOV_INFINIBAND_CNI_IMAGE")
data.Data["OVSCNIImage"] = os.Getenv("OVS_CNI_IMAGE")
data.Data["RDMACNIImage"] = os.Getenv("RDMA_CNI_IMAGE")
data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION")
data.Data["ClusterType"] = vars.ClusterType
data.Data["DevMode"] = os.Getenv("DEV_MODE")
Expand Down
2 changes: 2 additions & 0 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ var _ = BeforeSuite(func() {
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("OVS_CNI_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("RDMA_CNI_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("SRIOV_DEVICE_PLUGIN_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("NETWORK_RESOURCES_INJECTOR_IMAGE", "mock-image")
Expand Down
2 changes: 2 additions & 0 deletions deploy/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ spec:
value: $SRIOV_INFINIBAND_CNI_IMAGE
- name: OVS_CNI_IMAGE
value: $OVS_CNI_IMAGE
- name: RDMA_CNI_IMAGE
value: $RDMA_CNI_IMAGE
- name: SRIOV_DEVICE_PLUGIN_IMAGE
value: $SRIOV_DEVICE_PLUGIN_IMAGE
- name: NETWORK_RESOURCES_INJECTOR_IMAGE
Expand Down
1 change: 1 addition & 0 deletions deployment/sriov-network-operator-chart/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ This section contains general parameters that apply to both the operator and dae
| `images.sriovCni` | SR-IOV CNI image |
| `images.ibSriovCni` | InfiniBand SR-IOV CNI image |
| `images.ovsCni` | OVS CNI image |
| `images.rdmaCni` | RDMA CNI image |
| `images.sriovDevicePlugin` | SR-IOV device plugin image |
| `images.resourcesInjector` | Resources Injector image |
| `images.webhook` | Operator Webhook image |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ spec:
value: {{ .Values.images.ibSriovCni }}
- name: OVS_CNI_IMAGE
value: {{ .Values.images.ovsCni }}
- name: RDMA_CNI_IMAGE
value: {{ .Values.images.rdmaCni }}
- name: SRIOV_DEVICE_PLUGIN_IMAGE
value: {{ .Values.images.sriovDevicePlugin }}
- name: NETWORK_RESOURCES_INJECTOR_IMAGE
Expand Down
1 change: 1 addition & 0 deletions deployment/sriov-network-operator-chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ images:
sriovCni: ghcr.io/k8snetworkplumbingwg/sriov-cni
ibSriovCni: ghcr.io/k8snetworkplumbingwg/ib-sriov-cni
ovsCni: ghcr.io/k8snetworkplumbingwg/ovs-cni-plugin
rdmaCni: ghcr.io/k8snetworkplumbingwg/rdma-cni
sriovDevicePlugin: ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin
resourcesInjector: ghcr.io/k8snetworkplumbingwg/network-resources-injector
webhook: ghcr.io/k8snetworkplumbingwg/sriov-network-operator-webhook
Expand Down
4 changes: 4 additions & 0 deletions hack/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ if [ -z $SKIP_VAR_SET ]; then
export SRIOV_INFINIBAND_CNI_IMAGE=${SRIOV_INFINIBAND_CNI_IMAGE:-ghcr.io/k8snetworkplumbingwg/ib-sriov-cni}
# OVS_CNI_IMAGE can be explicitly set to empty value, use default only if the var is not set
export OVS_CNI_IMAGE=${OVS_CNI_IMAGE-ghcr.io/k8snetworkplumbingwg/ovs-cni-plugin}
# RDMA_CNI_IMAGE can be explicitly set to empty value, use default only if the var is not set
export RDMA_CNI_IMAGE=${RDMA_CNI_IMAGE-ghcr.io/k8snetworkplumbingwg/rdma-cni}
export SRIOV_DEVICE_PLUGIN_IMAGE=${SRIOV_DEVICE_PLUGIN_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin}
export NETWORK_RESOURCES_INJECTOR_IMAGE=${NETWORK_RESOURCES_INJECTOR_IMAGE:-ghcr.io/k8snetworkplumbingwg/network-resources-injector}
export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE=${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-operator-config-daemon}
Expand All @@ -13,6 +15,8 @@ if [ -z $SKIP_VAR_SET ]; then
else
# ensure that OVS_CNI_IMAGE is set, empty string is a valid value
OVS_CNI_IMAGE=${OVS_CNI_IMAGE:-}
# ensure that RDMA_CNI_IMAGE is set, empty string is a valid value
RDMA_CNI_IMAGE=${$RDMA_CNI_IMAGE:-}
METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE=${METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE:-}
[ -z $SRIOV_CNI_IMAGE ] && echo "SRIOV_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1
[ -z $SRIOV_INFINIBAND_CNI_IMAGE ] && echo "SRIOV_INFINIBAND_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1
Expand Down
2 changes: 2 additions & 0 deletions hack/release/chart-update.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ OPERATOR_TAG=${GITHUB_TAG}
IB_SRIOV_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg ib-sriov-cni)
SRIOV_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg sriov-cni)
OVS_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg ovs-cni)
RDMA_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg rdma-cni)
NETWORK_RESOURCE_INJECTOR_TAG=$(get_latest_github_tag k8snetworkplumbingwg network-resources-injector)
SRIOV_DEVICE_PLUGIN_TAG=$(get_latest_github_tag k8snetworkplumbingwg sriov-network-device-plugin)
METRICS_EXPORTER_TAG=$(get_latest_github_tag k8snetworkplumbingwg sriov-network-metrics-exporter)
Expand All @@ -61,6 +62,7 @@ $YQ_CMD -i ".images.webhook = \"ghcr.io/${OPERATOR_REPO}/sriov-network-operator-
$YQ_CMD -i ".images.sriovCni = \"ghcr.io/k8snetworkplumbingwg/sriov-cni:${SRIOV_CNI_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.ibSriovCni = \"ghcr.io/k8snetworkplumbingwg/ib-sriov-cni:${IB_SRIOV_CNI_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.ovsCni = \"ghcr.io/k8snetworkplumbingwg/ovs-cni-plugin:${OVS_CNI_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.rdmaCni = \"ghcr.io/k8snetworkplumbingwg/rdma-cni:${RDMA_CNI_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.sriovDevicePlugin = \"ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin:${SRIOV_DEVICE_PLUGIN_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.resourcesInjector = \"ghcr.io/k8snetworkplumbingwg/network-resources-injector:${NETWORK_RESOURCE_INJECTOR_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.metricsExporter = \"ghcr.io/k8snetworkplumbingwg/sriov-network-metrics-exporter:${METRICS_EXPORTER_TAG}\"" ${HELM_VALUES}
Expand Down
8 changes: 6 additions & 2 deletions hack/run-e2e-conformance-virtual-ocp.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
set -xeo pipefail

OCP_VERSION=${OCP_VERSION:-4.16.0-rc.2}
OCP_VERSION=${OCP_VERSION:-4.16.0}
cluster_name=${CLUSTER_NAME:-ocp-virt}
domain_name=lab

Expand Down Expand Up @@ -237,7 +237,11 @@ echo ${auth} > registry-login.conf
internal_registry="image-registry.openshift-image-registry.svc:5000"
pass=$( jq .\"image-registry.openshift-image-registry.svc:5000\".auth registry-login.conf )
pass=`echo ${pass:1:-1} | base64 -d`
podman login -u serviceaccount -p ${pass:15} $registry --tls-verify=false

# dockercfg password is in the form `<token>:password`. We need to trim the `<token>:` prefix
pass=${pass#"<token>:"}

podman login -u serviceaccount -p ${pass} $registry --tls-verify=false

MAX_RETRIES=20
DELAY_SECONDS=10
Expand Down
1 change: 1 addition & 0 deletions hack/run-e2e-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ KUBECONFIG=${KUBECONFIG:-/root/env/ign/auth/kubeconfig}
echo ${SRIOV_CNI_IMAGE}
echo ${SRIOV_INFINIBAND_CNI_IMAGE}
echo ${OVS_CNI_IMAGE}
echo ${RDMA_CNI_IMAGE}
echo ${SRIOV_DEVICE_PLUGIN_IMAGE}
echo ${NETWORK_RESOURCES_INJECTOR_IMAGE}
echo ${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}
Expand Down
14 changes: 14 additions & 0 deletions pkg/helper/mock/mock_helper.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions pkg/host/internal/sriov/sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,13 @@ func (s *sriov) checkForConfigAndReset(ifaceStatus sriovnetworkv1.InterfaceExt,
log.Log.V(2).Info("checkForConfigAndReset(): PF name with pci address was externally created skipping the device reset",
"pf-name", ifaceStatus.Name,
"address", ifaceStatus.PciAddress)

// remove pf status from host
err = storeManager.RemovePfAppliedStatus(ifaceStatus.PciAddress)
if err != nil {
return err
}

return nil
}
err = s.removeUdevRules(ifaceStatus.PciAddress)
Expand All @@ -828,6 +835,12 @@ func (s *sriov) checkForConfigAndReset(ifaceStatus sriovnetworkv1.InterfaceExt,
return err
}

// remove pf status from host
err = storeManager.RemovePfAppliedStatus(ifaceStatus.PciAddress)
if err != nil {
return err
}

return nil
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/host/internal/sriov/sriov_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ var _ = Describe("SRIOV", func() {
PciAddress: "0000:d8:00.0",
NumVfs: 2,
}, true, nil)
storeManagerMode.EXPECT().RemovePfAppliedStatus("0000:d8:00.0").Return(nil)
netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(
&netlink.DevlinkDevice{Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}},
nil)
Expand Down Expand Up @@ -479,6 +480,7 @@ var _ = Describe("SRIOV", func() {
NumVfs: 2,
ExternallyManaged: true,
}, true, nil)
storeManagerMode.EXPECT().RemovePfAppliedStatus("0000:d8:00.0").Return(nil)
Expect(s.ConfigSriovInterfaces(storeManagerMode,
[]sriovnetworkv1.Interface{},
[]sriovnetworkv1.InterfaceExt{
Expand Down
14 changes: 14 additions & 0 deletions pkg/host/store/mock/mock_store.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions pkg/host/store/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
type ManagerInterface interface {
ClearPCIAddressFolder() error
SaveLastPfAppliedStatus(PfInfo *sriovnetworkv1.Interface) error
RemovePfAppliedStatus(pciAddress string) error
LoadPfsStatus(pciAddress string) (*sriovnetworkv1.Interface, bool, error)

GetCheckPointNodeState() (*sriovnetworkv1.SriovNetworkNodeState, error)
Expand Down Expand Up @@ -111,6 +112,17 @@ func (s *manager) SaveLastPfAppliedStatus(PfInfo *sriovnetworkv1.Interface) erro
return err
}

func (s *manager) RemovePfAppliedStatus(pciAddress string) error {
hostExtension := utils.GetHostExtension()
pathFile := filepath.Join(hostExtension, consts.PfAppliedConfig, pciAddress)
err := os.RemoveAll(pathFile)
if err != nil {
log.Log.Error(err, "failed to remove PF status", "pathFile", pathFile)
return err
}
return nil
}

// LoadPfsStatus convert the /etc/sriov-operator/pci/<pci-address> json to pfstatus
// returns false if the file doesn't exist.
func (s *manager) LoadPfsStatus(pciAddress string) (*sriovnetworkv1.Interface, bool, error) {
Expand Down
21 changes: 21 additions & 0 deletions pkg/plugins/mellanox/mellanox_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS
processedNics[pciPrefix] = true
pciAddress := pciPrefix + "0"

// Skip devices not configured by the operator
if p.nicNotConfiguredByOperator(portsMap) {
continue
}

// Skip externally managed NICs
if p.nicHasExternallyManagedPFs(portsMap) {
continue
Expand Down Expand Up @@ -206,3 +211,19 @@ func (p *MellanoxPlugin) nicHasExternallyManagedPFs(nicPortsMap map[string]sriov
}
return false
}

// nicNotConfiguredByOperator returns true if one of the ports(interface) of the NIC is not configured by operator
func (p *MellanoxPlugin) nicNotConfiguredByOperator(nicPortsMap map[string]sriovnetworkv1.InterfaceExt) bool {
for _, iface := range nicPortsMap {
_, exist, err := p.helpers.LoadPfsStatus(iface.PciAddress)
if err != nil {
log.Log.Error(err, "failed to load PF status from disk", "address", iface.PciAddress)
continue
}
if exist {
log.Log.V(2).Info("PF configured by the operator", "interface", iface)
return true
}
}
return false
}
50 changes: 44 additions & 6 deletions test/conformance/tests/test_sriov_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1222,7 +1222,7 @@ var _ = Describe("[sriov] operator", func() {
It("Should be possible to create a vfio-pci resource and allocate to a pod", func() {
By("creating a vfio-pci node policy")
resourceName := "testvfio"
_, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, vfioNic.Name, vfioNode, 5, resourceName, "vfio-pci")
vfioPolicy, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, vfioNic.Name, vfioNode, 5, resourceName, "vfio-pci")
Expect(err).ToNot(HaveOccurred())

By("waiting for the node state to be updated")
Expand All @@ -1249,6 +1249,11 @@ var _ = Describe("[sriov] operator", func() {
return allocatable
}, 10*time.Minute, time.Second).Should(Equal(int64(5)))

By("validate the pf info exist on host")
output, _, err := runCommandOnConfigDaemon(vfioNode, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l")
Expect(err).ToNot(HaveOccurred())
Expect(output).ToNot(Equal("1"))

By("Creating sriov network to use the vfio device")
sriovNetwork := &sriovv1.SriovNetwork{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -1278,6 +1283,25 @@ var _ = Describe("[sriov] operator", func() {
networkStatusJSON, exist := firstPod.Annotations["k8s.v1.cni.cncf.io/network-status"]
Expect(exist).To(BeTrue())
Expect(networkStatusJSON).To(ContainSubstring(fmt.Sprintf("\"mtu\": %d", vfioNic.Mtu)))

By("deleting the policy")
err = clients.Delete(context.Background(), vfioPolicy, &runtimeclient.DeleteOptions{})
Expect(err).ToNot(HaveOccurred())
WaitForSRIOVStable()

Eventually(func() int64 {
testedNode, err := clients.CoreV1Interface.Nodes().Get(context.Background(), vfioNode, metav1.GetOptions{})
Expect(err).ToNot(HaveOccurred())
resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)]
allocatable, _ := resNum.AsInt64()
return allocatable
}, 2*time.Minute, time.Second).Should(Equal(int64(0)))

By("validate the pf info doesn't exist on the host anymore")
output, _, err = runCommandOnConfigDaemon(vfioNode, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l")
Expect(err).ToNot(HaveOccurred())
Expect(output).ToNot(Equal("0"))

})
})

Expand Down Expand Up @@ -2196,15 +2220,19 @@ var _ = Describe("[sriov] operator", func() {
Context("ExternallyManaged Validation", func() {
numVfs := 5
var node string
var nic *sriovv1.InterfaceExt
var nic sriovv1.InterfaceExt
externallyManage := func(policy *sriovv1.SriovNetworkNodePolicy) {
policy.Spec.ExternallyManaged = true
}

execute.BeforeAll(func() {
var err error
node, nic, err = sriovInfos.FindOneSriovNodeAndDevice()
Expect(err).ToNot(HaveOccurred())
node, nic = sriovInfos.FindOneVfioSriovDevice()
})

BeforeEach(func() {
if node == "" {
Skip("not suitable device found for the test")
}

By("Using device " + nic.Name + " on node " + node)
})
Expand Down Expand Up @@ -2265,6 +2293,11 @@ var _ = Describe("[sriov] operator", func() {
return allocatable
}, 3*time.Minute, time.Second).Should(Equal(int64(numVfs)))

By("validate the pf info exist on host")
output, _, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l")
Expect(err).ToNot(HaveOccurred())
Expect(output).ToNot(Equal("1"))

By("deleting the policy")
err = clients.Delete(context.Background(), sriovPolicy, &runtimeclient.DeleteOptions{})
Expect(err).ToNot(HaveOccurred())
Expand All @@ -2279,11 +2312,16 @@ var _ = Describe("[sriov] operator", func() {
}, 2*time.Minute, time.Second).Should(Equal(int64(0)))

By("checking the virtual functions are still on the host")
output, errOutput, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("cat /host/sys/class/net/%s/device/sriov_numvfs", nic.Name))
output, errOutput, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("cat /host/sys/class/net/%s/device/sriov_numvfs", nic.Name))
Expect(err).ToNot(HaveOccurred())
Expect(errOutput).To(Equal(""))
Expect(output).To(ContainSubstring("5"))

By("validate the pf info doesn't exist on the host anymore")
output, _, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l")
Expect(err).ToNot(HaveOccurred())
Expect(output).ToNot(Equal("0"))

By("cleaning the manual sriov created")
_, errOutput, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo 0 > /host/sys/class/net/%s/device/sriov_numvfs", nic.Name))
Expect(err).ToNot(HaveOccurred())
Expand Down

0 comments on commit 4782697

Please sign in to comment.