diff --git a/test/e2e/common.go b/test/e2e/common.go index 9039c6391a..cfdbbd06d5 100644 --- a/test/e2e/common.go +++ b/test/e2e/common.go @@ -32,6 +32,11 @@ import ( "sigs.k8s.io/cluster-api/util" ) +const ( + CCMPath = "CCM" + CCMResources = "CCM_RESOURCES" +) + func Byf(format string, a ...interface{}) { By(fmt.Sprintf(format, a...)) } diff --git a/test/e2e/config/gcp-ci.yaml b/test/e2e/config/gcp-ci.yaml index 2b50f8cce0..4a806ac69f 100644 --- a/test/e2e/config/gcp-ci.yaml +++ b/test/e2e/config/gcp-ci.yaml @@ -87,6 +87,8 @@ variables: CLUSTER_TOPOLOGY: "true" # Cluster Addons CNI: "${PWD}/test/e2e/data/cni/calico/calico.yaml" + CCM: "${PWD}/test/e2e/data/ccm/gce-cloud-controller-manager.yaml" + GCP_CONTROL_PLANE_MACHINE_TYPE: n1-standard-2 GCP_NODE_MACHINE_TYPE: n1-standard-2 CONFORMANCE_WORKER_MACHINE_COUNT: "2" diff --git a/test/e2e/data/ccm/gce-cloud-controller-manager.yaml b/test/e2e/data/ccm/gce-cloud-controller-manager.yaml new file mode 100644 index 0000000000..9162543b89 --- /dev/null +++ b/test/e2e/data/ccm/gce-cloud-controller-manager.yaml @@ -0,0 +1,347 @@ +# GCP CCM DaemonSet +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cloud-controller-manager + namespace: kube-system +spec: + revisionHistoryLimit: 2 + selector: + matchLabels: + app: gce-cloud-controller-manager + template: + metadata: + labels: + app: gce-cloud-controller-manager + spec: + dnsPolicy: Default + hostNetwork: true + priorityClassName: system-cluster-critical + serviceAccountName: cloud-controller-manager + nodeSelector: + node-role.kubernetes.io/control-plane: "" + # affinity: + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: node-role.kubernetes.io/control-plane + # operator: Exists + # - matchExpressions: + # - key: node-role.kubernetes.io/master + # operator: Exists + tolerations: + # this taint is set by all kubelets running `--cloud-provider=external` + # so we should tolerate it to schedule the gce ccm + - key: "node.cloudprovider.kubernetes.io/uninitialized" + value: "true" + effect: "NoSchedule" + - key: "CriticalAddonsOnly" + operator: "Exists" + # cloud controller manages should be able to run on masters + # TODO: remove this when ccm is not supported on k8s <= 1.23 + - key: "node-role.kubernetes.io/master" + effect: NoSchedule + # k8s clusters 1.24+ uses control-plane name instead of master + - key: "node-role.kubernetes.io/control-plane" + effect: NoSchedule + serviceAccountName: cloud-controller-manager + containers: + - name: cloud-controller-manager + image: gcr.io/k8s-staging-cloud-provider-gcp/cloud-controller-manager@sha256:b98242f767595c3c137e63bd270ce4de625abcac398fcc105848f0d7dcd30c02 + command: ['/cloud-controller-manager'] + args: + # The --help output of the controller binary suggests that profiling is enabled by default + - --profiling=false + - --v=4 + - --leader-elect=true + # We generate a ConfigMap for this file using Kustomize and apply it together with the CAPI manifests in the + # management cluster, then use it in KubeadmControlPlane.spec.kubeadmConfigSpec.files to have cloud-init + # write its contents to a file on controlplane nodes. See below for contents but I'm fairly sure we only needed + # to explicitly provide it to make Shared VPC work. + # - --cloud-config=/etc/kubernetes/cloud.config + # Default stuff + - --cloud-provider=gce + - --use-service-account-credentials=true + - --bind-address=127.0.0.1 + - --secure-port=10258 + # These took a bit of trial and error, most of them probably aren't universally applicable, as we run cilium without + # kube-proxy and use Shared VPC + Secondary VPC Ranges for "native" routing (https://docs.cilium.io/en/stable/network/concepts/routing/#google-cloud) + # - --cluster-name=my-cluster + # - --cluster-cidr=10.0.0.0/8 + # - --allocate-node-cidrs=true + # - --configure-cloud-routes=false + # - --cidr-allocator-type=CloudAllocator + # - --controllers=cloud-node,cloud-node-lifecycle,nodeipam,service + env: + # This probably won't work when running HA controlplanes, but without kube-proxy we don't get DNS resolution + # for services until cilium is up and running, which doesn't happen until after CCM itself is deployed. + - name: KUBERNETES_SERVICE_HOST + value: "127.0.0.1" + - name: KUBERNETES_SERVICE_PORT + value: "6443" + # volumeMounts: + # - mountPath: /etc/kubernetes/cloud.config + # name: cloudconfig + # readOnly: true + # volumes: + # - hostPath: + # path: /etc/kubernetes/cloud.config + # type: "" + # name: cloudconfig +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cloud-controller-manager + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + addonmanager.kubernetes.io/mode: Reconcile + name: system:cloud-controller-manager +rules: +- apiGroups: + - networking.gke.io + resources: + - network + verbs: + - get +- apiGroups: + - networking.gke.io + resources: + - network/status + - gkenetworkparamset + - gkenetworkparamset/status + verbs: + - update + - get +- apiGroups: + - "" + - events.k8s.io + resources: + - events + verbs: + - create + - patch + - update +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create +- apiGroups: + - coordination.k8s.io + resourceNames: + - cloud-controller-manager + resources: + - leases + verbs: + - get + - update +- apiGroups: + - "" + resources: + - endpoints + - serviceaccounts + verbs: + - create + - get + - update +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - update + - patch +- apiGroups: + - "" + resources: + - namespaces + verbs: + - get +- apiGroups: + - "" + resources: + - nodes/status + verbs: + - patch + - update +- apiGroups: + - "" + resources: + - secrets + verbs: + - create + - delete + - get + - update +- apiGroups: + - "authentication.k8s.io" + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - "*" + resources: + - "*" + verbs: + - list + - watch +- apiGroups: + - "" + resources: + - serviceaccounts/token + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + addonmanager.kubernetes.io/mode: Reconcile + name: system::leader-locking-cloud-controller-manager + namespace: kube-system +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - watch +- apiGroups: + - "" + resources: + - configmaps + resourceNames: + - cloud-controller-manager + verbs: + - get + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + addonmanager.kubernetes.io/mode: Reconcile + name: system:controller:cloud-node-controller +rules: +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - update + - delete + - patch +- apiGroups: + - "" + resources: + - nodes/status + verbs: + - get + - list + - update + - delete + - patch + +- apiGroups: + - "" + resources: + - pods + verbs: + - list + - delete +- apiGroups: + - "" + resources: + - pods/status + verbs: + - list + - delete +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + addonmanager.kubernetes.io/mode: Reconcile + name: system:controller:pvl-controller +rules: +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update +- apiGroups: + - "" + resources: + - persistentvolumeclaims + - persistentvolumes + verbs: + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + addonmanager.kubernetes.io/mode: Reconcile + name: system::leader-locking-cloud-controller-manager + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: system::leader-locking-cloud-controller-manager +subjects: +- kind: ServiceAccount + name: cloud-controller-manager + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + addonmanager.kubernetes.io/mode: Reconcile + name: system:cloud-controller-manager +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:cloud-controller-manager +subjects: +- kind: User + apiGroup: rbac.authorization.k8s.io + name: system:cloud-controller-manager +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + addonmanager.kubernetes.io/mode: Reconcile + name: system:controller:cloud-node-controller +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:controller:cloud-node-controller +subjects: +- kind: ServiceAccount + name: cloud-node-controller + namespace: kube-system diff --git a/test/e2e/data/cni/calico/calico.yaml b/test/e2e/data/cni/calico/calico.yaml index 8613b07782..5d669079b3 100644 --- a/test/e2e/data/cni/calico/calico.yaml +++ b/test/e2e/data/cni/calico/calico.yaml @@ -4950,6 +4950,8 @@ spec: effect: NoSchedule - key: node-role.kubernetes.io/control-plane effect: NoSchedule + - key: "node.cloudprovider.kubernetes.io/uninitialized" + effect: NoSchedule serviceAccountName: calico-kube-controllers priorityClassName: system-cluster-critical containers: diff --git a/test/e2e/data/infrastructure-gcp/cluster-template-prow-ci-version.yaml b/test/e2e/data/infrastructure-gcp/cluster-template-prow-ci-version.yaml index bae2291c88..f097957b73 100644 --- a/test/e2e/data/infrastructure-gcp/cluster-template-prow-ci-version.yaml +++ b/test/e2e/data/infrastructure-gcp/cluster-template-prow-ci-version.yaml @@ -5,6 +5,7 @@ metadata: name: "${CLUSTER_NAME}" labels: cni: "${CLUSTER_NAME}-crs-cni" + ccm: "${CLUSTER_NAME}-crs-ccm" spec: clusterNetwork: pods: @@ -46,15 +47,30 @@ spec: nodeRegistration: name: '{{ ds.meta_data.local_hostname.split(".")[0] }}' kubeletExtraArgs: - cloud-provider: gce + cloud-provider: external + taints: + # this taint is set by all kubelets running `--cloud-provider=external` + # so we should tolerate it to schedule the digitalocean ccm + - key: "node.cloudprovider.kubernetes.io/uninitialized" + value: "true" + effect: "NoSchedule" + - key: "CriticalAddonsOnly" + operator: "Exists" + # cloud controller manages should be able to run on masters + # TODO: remove this when ccm is not supported on k8s <= 1.23 + - key: "node-role.kubernetes.io/master" + effect: NoSchedule + # k8s clusters 1.24+ uses control-plane name instead of master + - key: "node-role.kubernetes.io/control-plane" + effect: NoSchedule clusterConfiguration: apiServer: timeoutForControlPlane: 20m extraArgs: - cloud-provider: gce + cloud-provider: external controllerManager: extraArgs: - cloud-provider: gce + cloud-provider: external allocate-node-cidrs: "false" v: "4" kubernetesVersion: ci/${CI_VERSION} @@ -128,7 +144,22 @@ spec: nodeRegistration: name: '{{ ds.meta_data.local_hostname.split(".")[0] }}' kubeletExtraArgs: - cloud-provider: gce + cloud-provider: external + taints: + # this taint is set by all kubelets running `--cloud-provider=external` + # so we should tolerate it to schedule the digitalocean ccm + - key: "node.cloudprovider.kubernetes.io/uninitialized" + value: "true" + effect: "NoSchedule" + - key: "CriticalAddonsOnly" + operator: "Exists" + # cloud controller manages should be able to run on masters + # TODO: remove this when ccm is not supported on k8s <= 1.23 + - key: "node-role.kubernetes.io/master" + effect: NoSchedule + # k8s clusters 1.24+ uses control-plane name instead of master + - key: "node-role.kubernetes.io/control-plane" + effect: NoSchedule postKubeadmCommands: [] preKubeadmCommands: - bash -c /tmp/kubeadm-bootstrap.sh @@ -255,7 +286,22 @@ spec: nodeRegistration: name: '{{ ds.meta_data.local_hostname.split(".")[0] }}' kubeletExtraArgs: - cloud-provider: gce + cloud-provider: external + taints: + # this taint is set by all kubelets running `--cloud-provider=external` + # so we should tolerate it to schedule the digitalocean ccm + - key: "node.cloudprovider.kubernetes.io/uninitialized" + value: "true" + effect: "NoSchedule" + - key: "CriticalAddonsOnly" + operator: "Exists" + # cloud controller manages should be able to run on masters + # TODO: remove this when ccm is not supported on k8s <= 1.23 + - key: "node-role.kubernetes.io/master" + effect: NoSchedule + # k8s clusters 1.24+ uses control-plane name instead of master + - key: "node-role.kubernetes.io/control-plane" + effect: NoSchedule preKubeadmCommands: - bash -c /tmp/kubeadm-bootstrap.sh --- @@ -292,3 +338,22 @@ spec: resources: - name: "${CLUSTER_NAME}-crs-cni" kind: ConfigMap +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: "${CLUSTER_NAME}-crs-ccm" +data: ${CCM_RESOURCES} +--- +apiVersion: addons.cluster.x-k8s.io/v1beta1 +kind: ClusterResourceSet +metadata: + name: "${CLUSTER_NAME}-crs-ccm" +spec: + strategy: ApplyOnce + clusterSelector: + matchLabels: + ccm: "${CLUSTER_NAME}-crs-ccm" + resources: + - name: "${CLUSTER_NAME}-crs-ccm" + kind: ConfigMap diff --git a/test/e2e/suite_test.go b/test/e2e/suite_test.go index 8dff9d3a5d..1fe2d0c894 100644 --- a/test/e2e/suite_test.go +++ b/test/e2e/suite_test.go @@ -216,6 +216,12 @@ func createClusterctlLocalRepository(config *clusterctl.E2EConfig, repositoryFol Expect(cniPath).To(BeAnExistingFile(), "The %s variable should resolve to an existing file", capi_e2e.CNIPath) createRepositoryInput.RegisterClusterResourceSetConfigMapTransformation(cniPath, capi_e2e.CNIResources) + // Ensuring a CCM file is defined in the config and register a FileTransformation to inject the referenced file as in place of the CCM_RESOURCES envSubst variable. + Expect(config.Variables).To(HaveKey(CCMPath), "Missing %s variable in the config", CCMPath) + ccmPath := config.GetVariable(CCMPath) + Expect(ccmPath).To(BeAnExistingFile(), "The %s variable should resolve to an existing file", CCMPath) + createRepositoryInput.RegisterClusterResourceSetConfigMapTransformation(ccmPath, CCMResources) + clusterctlConfig := clusterctl.CreateRepository(context.TODO(), createRepositoryInput) Expect(clusterctlConfig).To(BeAnExistingFile(), "The clusterctl config file does not exists in the local repository %s", repositoryFolder)