Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync: from upstream master to master #13

Merged
merged 10 commits into from
Jul 15, 2024
17 changes: 17 additions & 0 deletions bindata/manifests/daemon/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,23 @@ spec:
mountPath: /host/etc/os-release
readOnly: true
{{- end }}
{{- if .RDMACNIImage }}
- name: rdma-cni
image: {{.RDMACNIImage}}
args: ["--no-sleep"]
securityContext:
privileged: true
resources:
requests:
cpu: 10m
memory: 10Mi
volumeMounts:
- name: cnibin
mountPath: /host/opt/cni/bin
- name: os-release
mountPath: /host/etc/os-release
readOnly: true
{{- end }}
{{- if .UsedSystemdMode}}
- name: sriov-service-copy
image: {{.Image}}
Expand Down
1 change: 1 addition & 0 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context,
data.Data["SRIOVCNIImage"] = os.Getenv("SRIOV_CNI_IMAGE")
data.Data["SRIOVInfiniBandCNIImage"] = os.Getenv("SRIOV_INFINIBAND_CNI_IMAGE")
data.Data["OVSCNIImage"] = os.Getenv("OVS_CNI_IMAGE")
data.Data["RDMACNIImage"] = os.Getenv("RDMA_CNI_IMAGE")
data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION")
data.Data["ClusterType"] = vars.ClusterType
data.Data["DevMode"] = os.Getenv("DEV_MODE")
Expand Down
2 changes: 2 additions & 0 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ var _ = BeforeSuite(func() {
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("OVS_CNI_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("RDMA_CNI_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("SRIOV_DEVICE_PLUGIN_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("NETWORK_RESOURCES_INJECTOR_IMAGE", "mock-image")
Expand Down
2 changes: 2 additions & 0 deletions deploy/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ spec:
value: $SRIOV_INFINIBAND_CNI_IMAGE
- name: OVS_CNI_IMAGE
value: $OVS_CNI_IMAGE
- name: RDMA_CNI_IMAGE
value: $RDMA_CNI_IMAGE
- name: SRIOV_DEVICE_PLUGIN_IMAGE
value: $SRIOV_DEVICE_PLUGIN_IMAGE
- name: NETWORK_RESOURCES_INJECTOR_IMAGE
Expand Down
1 change: 1 addition & 0 deletions deployment/sriov-network-operator-chart/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ This section contains general parameters that apply to both the operator and dae
| `images.sriovCni` | SR-IOV CNI image |
| `images.ibSriovCni` | InfiniBand SR-IOV CNI image |
| `images.ovsCni` | OVS CNI image |
| `images.rdmaCni` | RDMA CNI image |
| `images.sriovDevicePlugin` | SR-IOV device plugin image |
| `images.resourcesInjector` | Resources Injector image |
| `images.webhook` | Operator Webhook image |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ spec:
value: {{ .Values.images.ibSriovCni }}
- name: OVS_CNI_IMAGE
value: {{ .Values.images.ovsCni }}
- name: RDMA_CNI_IMAGE
value: {{ .Values.images.rdmaCni }}
- name: SRIOV_DEVICE_PLUGIN_IMAGE
value: {{ .Values.images.sriovDevicePlugin }}
- name: NETWORK_RESOURCES_INJECTOR_IMAGE
Expand Down
1 change: 1 addition & 0 deletions deployment/sriov-network-operator-chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ images:
sriovCni: ghcr.io/k8snetworkplumbingwg/sriov-cni
ibSriovCni: ghcr.io/k8snetworkplumbingwg/ib-sriov-cni
ovsCni: ghcr.io/k8snetworkplumbingwg/ovs-cni-plugin
rdmaCni: ghcr.io/k8snetworkplumbingwg/rdma-cni
sriovDevicePlugin: ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin
resourcesInjector: ghcr.io/k8snetworkplumbingwg/network-resources-injector
webhook: ghcr.io/k8snetworkplumbingwg/sriov-network-operator-webhook
Expand Down
4 changes: 4 additions & 0 deletions hack/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ if [ -z $SKIP_VAR_SET ]; then
export SRIOV_INFINIBAND_CNI_IMAGE=${SRIOV_INFINIBAND_CNI_IMAGE:-ghcr.io/k8snetworkplumbingwg/ib-sriov-cni}
# OVS_CNI_IMAGE can be explicitly set to empty value, use default only if the var is not set
export OVS_CNI_IMAGE=${OVS_CNI_IMAGE-ghcr.io/k8snetworkplumbingwg/ovs-cni-plugin}
# RDMA_CNI_IMAGE can be explicitly set to empty value, use default only if the var is not set
export RDMA_CNI_IMAGE=${RDMA_CNI_IMAGE-ghcr.io/k8snetworkplumbingwg/rdma-cni}
export SRIOV_DEVICE_PLUGIN_IMAGE=${SRIOV_DEVICE_PLUGIN_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin}
export NETWORK_RESOURCES_INJECTOR_IMAGE=${NETWORK_RESOURCES_INJECTOR_IMAGE:-ghcr.io/k8snetworkplumbingwg/network-resources-injector}
export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE=${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE:-ghcr.io/k8snetworkplumbingwg/sriov-network-operator-config-daemon}
Expand All @@ -13,6 +15,8 @@ if [ -z $SKIP_VAR_SET ]; then
else
# ensure that OVS_CNI_IMAGE is set, empty string is a valid value
OVS_CNI_IMAGE=${OVS_CNI_IMAGE:-}
# ensure that RDMA_CNI_IMAGE is set, empty string is a valid value
RDMA_CNI_IMAGE=${$RDMA_CNI_IMAGE:-}
METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE=${METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE:-}
[ -z $SRIOV_CNI_IMAGE ] && echo "SRIOV_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1
[ -z $SRIOV_INFINIBAND_CNI_IMAGE ] && echo "SRIOV_INFINIBAND_CNI_IMAGE is empty but SKIP_VAR_SET is set" && exit 1
Expand Down
2 changes: 2 additions & 0 deletions hack/release/chart-update.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ OPERATOR_TAG=${GITHUB_TAG}
IB_SRIOV_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg ib-sriov-cni)
SRIOV_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg sriov-cni)
OVS_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg ovs-cni)
RDMA_CNI_TAG=$(get_latest_github_tag k8snetworkplumbingwg rdma-cni)
NETWORK_RESOURCE_INJECTOR_TAG=$(get_latest_github_tag k8snetworkplumbingwg network-resources-injector)
SRIOV_DEVICE_PLUGIN_TAG=$(get_latest_github_tag k8snetworkplumbingwg sriov-network-device-plugin)
METRICS_EXPORTER_TAG=$(get_latest_github_tag k8snetworkplumbingwg sriov-network-metrics-exporter)
Expand All @@ -61,6 +62,7 @@ $YQ_CMD -i ".images.webhook = \"ghcr.io/${OPERATOR_REPO}/sriov-network-operator-
$YQ_CMD -i ".images.sriovCni = \"ghcr.io/k8snetworkplumbingwg/sriov-cni:${SRIOV_CNI_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.ibSriovCni = \"ghcr.io/k8snetworkplumbingwg/ib-sriov-cni:${IB_SRIOV_CNI_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.ovsCni = \"ghcr.io/k8snetworkplumbingwg/ovs-cni-plugin:${OVS_CNI_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.rdmaCni = \"ghcr.io/k8snetworkplumbingwg/rdma-cni:${RDMA_CNI_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.sriovDevicePlugin = \"ghcr.io/k8snetworkplumbingwg/sriov-network-device-plugin:${SRIOV_DEVICE_PLUGIN_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.resourcesInjector = \"ghcr.io/k8snetworkplumbingwg/network-resources-injector:${NETWORK_RESOURCE_INJECTOR_TAG}\"" ${HELM_VALUES}
$YQ_CMD -i ".images.metricsExporter = \"ghcr.io/k8snetworkplumbingwg/sriov-network-metrics-exporter:${METRICS_EXPORTER_TAG}\"" ${HELM_VALUES}
Expand Down
8 changes: 6 additions & 2 deletions hack/run-e2e-conformance-virtual-ocp.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
set -xeo pipefail

OCP_VERSION=${OCP_VERSION:-4.16.0-rc.2}
OCP_VERSION=${OCP_VERSION:-4.16.0}
cluster_name=${CLUSTER_NAME:-ocp-virt}
domain_name=lab

Expand Down Expand Up @@ -237,7 +237,11 @@ echo ${auth} > registry-login.conf
internal_registry="image-registry.openshift-image-registry.svc:5000"
pass=$( jq .\"image-registry.openshift-image-registry.svc:5000\".auth registry-login.conf )
pass=`echo ${pass:1:-1} | base64 -d`
podman login -u serviceaccount -p ${pass:15} $registry --tls-verify=false

# dockercfg password is in the form `<token>:password`. We need to trim the `<token>:` prefix
pass=${pass#"<token>:"}

podman login -u serviceaccount -p ${pass} $registry --tls-verify=false

MAX_RETRIES=20
DELAY_SECONDS=10
Expand Down
1 change: 1 addition & 0 deletions hack/run-e2e-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ KUBECONFIG=${KUBECONFIG:-/root/env/ign/auth/kubeconfig}
echo ${SRIOV_CNI_IMAGE}
echo ${SRIOV_INFINIBAND_CNI_IMAGE}
echo ${OVS_CNI_IMAGE}
echo ${RDMA_CNI_IMAGE}
echo ${SRIOV_DEVICE_PLUGIN_IMAGE}
echo ${NETWORK_RESOURCES_INJECTOR_IMAGE}
echo ${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}
Expand Down
14 changes: 14 additions & 0 deletions pkg/helper/mock/mock_helper.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions pkg/host/internal/sriov/sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,13 @@ func (s *sriov) checkForConfigAndReset(ifaceStatus sriovnetworkv1.InterfaceExt,
log.Log.V(2).Info("checkForConfigAndReset(): PF name with pci address was externally created skipping the device reset",
"pf-name", ifaceStatus.Name,
"address", ifaceStatus.PciAddress)

// remove pf status from host
err = storeManager.RemovePfAppliedStatus(ifaceStatus.PciAddress)
if err != nil {
return err
}

return nil
}
err = s.removeUdevRules(ifaceStatus.PciAddress)
Expand All @@ -828,6 +835,12 @@ func (s *sriov) checkForConfigAndReset(ifaceStatus sriovnetworkv1.InterfaceExt,
return err
}

// remove pf status from host
err = storeManager.RemovePfAppliedStatus(ifaceStatus.PciAddress)
if err != nil {
return err
}

return nil
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/host/internal/sriov/sriov_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ var _ = Describe("SRIOV", func() {
PciAddress: "0000:d8:00.0",
NumVfs: 2,
}, true, nil)
storeManagerMode.EXPECT().RemovePfAppliedStatus("0000:d8:00.0").Return(nil)
netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(
&netlink.DevlinkDevice{Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}},
nil)
Expand Down Expand Up @@ -479,6 +480,7 @@ var _ = Describe("SRIOV", func() {
NumVfs: 2,
ExternallyManaged: true,
}, true, nil)
storeManagerMode.EXPECT().RemovePfAppliedStatus("0000:d8:00.0").Return(nil)
Expect(s.ConfigSriovInterfaces(storeManagerMode,
[]sriovnetworkv1.Interface{},
[]sriovnetworkv1.InterfaceExt{
Expand Down
14 changes: 14 additions & 0 deletions pkg/host/store/mock/mock_store.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions pkg/host/store/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
type ManagerInterface interface {
ClearPCIAddressFolder() error
SaveLastPfAppliedStatus(PfInfo *sriovnetworkv1.Interface) error
RemovePfAppliedStatus(pciAddress string) error
LoadPfsStatus(pciAddress string) (*sriovnetworkv1.Interface, bool, error)

GetCheckPointNodeState() (*sriovnetworkv1.SriovNetworkNodeState, error)
Expand Down Expand Up @@ -111,6 +112,17 @@ func (s *manager) SaveLastPfAppliedStatus(PfInfo *sriovnetworkv1.Interface) erro
return err
}

func (s *manager) RemovePfAppliedStatus(pciAddress string) error {
hostExtension := utils.GetHostExtension()
pathFile := filepath.Join(hostExtension, consts.PfAppliedConfig, pciAddress)
err := os.RemoveAll(pathFile)
if err != nil {
log.Log.Error(err, "failed to remove PF status", "pathFile", pathFile)
return err
}
return nil
}

// LoadPfsStatus convert the /etc/sriov-operator/pci/<pci-address> json to pfstatus
// returns false if the file doesn't exist.
func (s *manager) LoadPfsStatus(pciAddress string) (*sriovnetworkv1.Interface, bool, error) {
Expand Down
21 changes: 21 additions & 0 deletions pkg/plugins/mellanox/mellanox_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS
processedNics[pciPrefix] = true
pciAddress := pciPrefix + "0"

// Skip devices not configured by the operator
if p.nicNotConfiguredByOperator(portsMap) {
continue
}

// Skip externally managed NICs
if p.nicHasExternallyManagedPFs(portsMap) {
continue
Expand Down Expand Up @@ -206,3 +211,19 @@ func (p *MellanoxPlugin) nicHasExternallyManagedPFs(nicPortsMap map[string]sriov
}
return false
}

// nicNotConfiguredByOperator returns true if one of the ports(interface) of the NIC is not configured by operator
func (p *MellanoxPlugin) nicNotConfiguredByOperator(nicPortsMap map[string]sriovnetworkv1.InterfaceExt) bool {
for _, iface := range nicPortsMap {
_, exist, err := p.helpers.LoadPfsStatus(iface.PciAddress)
if err != nil {
log.Log.Error(err, "failed to load PF status from disk", "address", iface.PciAddress)
continue
}
if exist {
log.Log.V(2).Info("PF configured by the operator", "interface", iface)
return true
}
}
return false
}
50 changes: 44 additions & 6 deletions test/conformance/tests/test_sriov_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1222,7 +1222,7 @@ var _ = Describe("[sriov] operator", func() {
It("Should be possible to create a vfio-pci resource and allocate to a pod", func() {
By("creating a vfio-pci node policy")
resourceName := "testvfio"
_, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, vfioNic.Name, vfioNode, 5, resourceName, "vfio-pci")
vfioPolicy, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, vfioNic.Name, vfioNode, 5, resourceName, "vfio-pci")
Expect(err).ToNot(HaveOccurred())

By("waiting for the node state to be updated")
Expand All @@ -1249,6 +1249,11 @@ var _ = Describe("[sriov] operator", func() {
return allocatable
}, 10*time.Minute, time.Second).Should(Equal(int64(5)))

By("validate the pf info exist on host")
output, _, err := runCommandOnConfigDaemon(vfioNode, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l")
Expect(err).ToNot(HaveOccurred())
Expect(output).ToNot(Equal("1"))

By("Creating sriov network to use the vfio device")
sriovNetwork := &sriovv1.SriovNetwork{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -1278,6 +1283,25 @@ var _ = Describe("[sriov] operator", func() {
networkStatusJSON, exist := firstPod.Annotations["k8s.v1.cni.cncf.io/network-status"]
Expect(exist).To(BeTrue())
Expect(networkStatusJSON).To(ContainSubstring(fmt.Sprintf("\"mtu\": %d", vfioNic.Mtu)))

By("deleting the policy")
err = clients.Delete(context.Background(), vfioPolicy, &runtimeclient.DeleteOptions{})
Expect(err).ToNot(HaveOccurred())
WaitForSRIOVStable()

Eventually(func() int64 {
testedNode, err := clients.CoreV1Interface.Nodes().Get(context.Background(), vfioNode, metav1.GetOptions{})
Expect(err).ToNot(HaveOccurred())
resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)]
allocatable, _ := resNum.AsInt64()
return allocatable
}, 2*time.Minute, time.Second).Should(Equal(int64(0)))

By("validate the pf info doesn't exist on the host anymore")
output, _, err = runCommandOnConfigDaemon(vfioNode, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l")
Expect(err).ToNot(HaveOccurred())
Expect(output).ToNot(Equal("0"))

})
})

Expand Down Expand Up @@ -2196,15 +2220,19 @@ var _ = Describe("[sriov] operator", func() {
Context("ExternallyManaged Validation", func() {
numVfs := 5
var node string
var nic *sriovv1.InterfaceExt
var nic sriovv1.InterfaceExt
externallyManage := func(policy *sriovv1.SriovNetworkNodePolicy) {
policy.Spec.ExternallyManaged = true
}

execute.BeforeAll(func() {
var err error
node, nic, err = sriovInfos.FindOneSriovNodeAndDevice()
Expect(err).ToNot(HaveOccurred())
node, nic = sriovInfos.FindOneVfioSriovDevice()
})

BeforeEach(func() {
if node == "" {
Skip("not suitable device found for the test")
}

By("Using device " + nic.Name + " on node " + node)
})
Expand Down Expand Up @@ -2265,6 +2293,11 @@ var _ = Describe("[sriov] operator", func() {
return allocatable
}, 3*time.Minute, time.Second).Should(Equal(int64(numVfs)))

By("validate the pf info exist on host")
output, _, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l")
Expect(err).ToNot(HaveOccurred())
Expect(output).ToNot(Equal("1"))

By("deleting the policy")
err = clients.Delete(context.Background(), sriovPolicy, &runtimeclient.DeleteOptions{})
Expect(err).ToNot(HaveOccurred())
Expand All @@ -2279,11 +2312,16 @@ var _ = Describe("[sriov] operator", func() {
}, 2*time.Minute, time.Second).Should(Equal(int64(0)))

By("checking the virtual functions are still on the host")
output, errOutput, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("cat /host/sys/class/net/%s/device/sriov_numvfs", nic.Name))
output, errOutput, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("cat /host/sys/class/net/%s/device/sriov_numvfs", nic.Name))
Expect(err).ToNot(HaveOccurred())
Expect(errOutput).To(Equal(""))
Expect(output).To(ContainSubstring("5"))

By("validate the pf info doesn't exist on the host anymore")
output, _, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", "ls /host/etc/sriov-operator/pci/ | wc -l")
Expect(err).ToNot(HaveOccurred())
Expect(output).ToNot(Equal("0"))

By("cleaning the manual sriov created")
_, errOutput, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo 0 > /host/sys/class/net/%s/device/sriov_numvfs", nic.Name))
Expect(err).ToNot(HaveOccurred())
Expand Down
Loading