From fd7e20baa14c8edf57fa62584b07c0c2a2db71e3 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Thu, 21 Nov 2024 12:49:52 +0100 Subject: [PATCH] Ignition service for bootstrapping kubeadm and kubelet. (#8) * Ignition service for bootstrapping kubeadm and kubelet. * Provide containerd config. * Deploy more stuff from Ansible. * Typo * Install as binary, GPG does not work easily without tty. --- README.md | 4 +- capi-lab/deploy.yaml | 8 ++ capi-lab/firewall-rules.yaml | 23 ++++ capi-lab/requirements.yaml | 2 +- .../defaults/main.yaml | 2 + .../tasks/main.yaml | 29 +++++ capi-lab/roles/prometheus/defaults/main.yaml | 3 + capi-lab/roles/prometheus/tasks/main.yaml | 22 ++++ config/samples/example-kubeadm.yaml | 121 ++++++++++++++++++ config/samples/example.yaml | 61 --------- config/samples/kustomization.yaml | 2 +- .../metalstackcluster_controller.go | 18 ++- 12 files changed, 222 insertions(+), 73 deletions(-) create mode 100644 capi-lab/firewall-rules.yaml create mode 100644 capi-lab/roles/firewall-controller-manager/defaults/main.yaml create mode 100644 capi-lab/roles/firewall-controller-manager/tasks/main.yaml create mode 100644 capi-lab/roles/prometheus/defaults/main.yaml create mode 100644 capi-lab/roles/prometheus/tasks/main.yaml create mode 100644 config/samples/example-kubeadm.yaml delete mode 100644 config/samples/example.yaml diff --git a/README.md b/README.md index 055f767..4e15f1b 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,7 @@ ```bash make -C capi-lab eval $(make -C capi-lab --silent dev-env) -kubectl apply -f ../firewall-controller-manager/config/crds -kubectl create -f https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.77.1/bundle.yaml -clusterctl init +EXP_KUBEADM_BOOTSTRAP_FORMAT_IGNITION=true clusterctl init make push-to-capi-lab kubectl apply -k config/samples diff --git a/capi-lab/deploy.yaml b/capi-lab/deploy.yaml index cc1d482..13c3165 100644 --- a/capi-lab/deploy.yaml +++ b/capi-lab/deploy.yaml @@ -3,8 +3,16 @@ hosts: localhost connection: local gather_facts: false + vars: + setup_yaml: + - url: https://raw.githubusercontent.com/metal-stack/releases/develop/release.yaml + meta_var: metal_stack_release roles: - name: ansible-common - name: metal-roles - name: cert-manager + - name: prometheus + - name: firewall-controller-manager + vars: + firewall_controller_manager_namespace: cap-metal-stack - name: cluster-api-provider-metal-stack diff --git a/capi-lab/firewall-rules.yaml b/capi-lab/firewall-rules.yaml new file mode 100644 index 0000000..22a406f --- /dev/null +++ b/capi-lab/firewall-rules.yaml @@ -0,0 +1,23 @@ +egress: +- comment: allow outgoing traffic for HTTP and HTTPS and DNS + ports: + - 443 + - 80 + - 53 + protocol: TCP + to: + - 0.0.0.0/0 +- comment: allow outgoing DNS and NTP via UDP + ports: + - 53 + - 123 + protocol: UDP + to: + - 0.0.0.0/0 +ingress: +- comment: allow incoming HTTPS to kube-apiserver + ports: + - 443 + protocol: TCP + from: + - 0.0.0.0/0 diff --git a/capi-lab/requirements.yaml b/capi-lab/requirements.yaml index f3cdbec..f3409b5 100644 --- a/capi-lab/requirements.yaml +++ b/capi-lab/requirements.yaml @@ -6,4 +6,4 @@ version: v0.2.9 - src: https://github.com/metal-stack/metal-roles name: metal-roles - version: v0.14.2 + version: v0.14.5 diff --git a/capi-lab/roles/firewall-controller-manager/defaults/main.yaml b/capi-lab/roles/firewall-controller-manager/defaults/main.yaml new file mode 100644 index 0000000..6bb9002 --- /dev/null +++ b/capi-lab/roles/firewall-controller-manager/defaults/main.yaml @@ -0,0 +1,2 @@ +--- +firewall_controller_manager_namespace: "firewall-controller-manager" diff --git a/capi-lab/roles/firewall-controller-manager/tasks/main.yaml b/capi-lab/roles/firewall-controller-manager/tasks/main.yaml new file mode 100644 index 0000000..0a35dd7 --- /dev/null +++ b/capi-lab/roles/firewall-controller-manager/tasks/main.yaml @@ -0,0 +1,29 @@ +--- +- name: Gather release versions + setup_yaml: + +- name: Create namespace + k8s: + definition: + apiVersion: v1 + kind: Namespace + metadata: + name: "{{ firewall_controller_manager_namespace }}" + labels: + name: "{{ firewall_controller_manager_namespace }}" + +- name: Deploy firewall-controller-manager CRDs + k8s: + definition: "{{ lookup('url', 'https://raw.githubusercontent.com/metal-stack/firewall-controller-manager/refs/tags/' + firewall_controller_manager_image_tag + '/config/crds/' + item, split_lines=False) }}" + namespace: "{{ firewall_controller_manager_namespace }}" + loop: + - firewall.metal-stack.io_firewalldeployments.yaml + - firewall.metal-stack.io_firewallmonitors.yaml + - firewall.metal-stack.io_firewalls.yaml + - firewall.metal-stack.io_firewallsets.yaml + +# - name: Deploy firewall-controller-manager +# k8s: +# definition: + +# namespace: "{{ firewall_controller_manager_namespace }}" diff --git a/capi-lab/roles/prometheus/defaults/main.yaml b/capi-lab/roles/prometheus/defaults/main.yaml new file mode 100644 index 0000000..fcec798 --- /dev/null +++ b/capi-lab/roles/prometheus/defaults/main.yaml @@ -0,0 +1,3 @@ +--- +prometheus_namespace: prometheus +prometheus_helm_chart_version: "66.2.1" diff --git a/capi-lab/roles/prometheus/tasks/main.yaml b/capi-lab/roles/prometheus/tasks/main.yaml new file mode 100644 index 0000000..0e98806 --- /dev/null +++ b/capi-lab/roles/prometheus/tasks/main.yaml @@ -0,0 +1,22 @@ +--- +- name: Create namespace + k8s: + definition: + apiVersion: v1 + kind: Namespace + metadata: + name: "{{ prometheus_namespace }}" + labels: + name: "{{ prometheus_namespace }}" + +- name: Deploy Prometheus + include_role: + name: ansible-common/roles/helm-chart + vars: + helm_chart: kube-prometheus-stack + helm_repo: https://prometheus-community.github.io/helm-charts + helm_force: false + helm_release_name: kube-prometheus-stack + helm_target_namespace: "{{ prometheus_namespace }}" + helm_chart_version: "{{ prometheus_helm_chart_version }}" + # helm_value_file_template: values.yaml diff --git a/config/samples/example-kubeadm.yaml b/config/samples/example-kubeadm.yaml new file mode 100644 index 0000000..8642605 --- /dev/null +++ b/config/samples/example-kubeadm.yaml @@ -0,0 +1,121 @@ +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: metal-test +spec: + clusterNetwork: + pods: + cidrBlocks: ["192.168.0.0/16"] + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + kind: KubeadmControlPlane + name: metal-test-controlplane + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 + kind: MetalStackCluster + name: metal-test +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 +kind: MetalStackCluster +metadata: + name: metal-test +spec: + projectID: 00000000-0000-0000-0000-000000000001 + partition: mini-lab + firewall: + size: v1-small-x86 + image: firewall-ubuntu-3.0 + networks: + - internet-mini-lab +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 +kind: MetalStackMachineTemplate +metadata: + name: metal-test-controlplane +spec: + template: + spec: + image: ubuntu-24.04 + size: v1-small-x86 +--- +kind: KubeadmControlPlane +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +metadata: + name: metal-test-controlplane +spec: + replicas: 1 + version: v1.30.6 + machineTemplate: + nodeDrainTimeout: 10m + infrastructureRef: + kind: MetalStackMachineTemplate + apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 + name: metal-test-controlplane + kubeadmConfigSpec: + format: ignition + clusterConfiguration: + controlPlaneEndpoint: 203.0.113.129:443 + initConfiguration: + localAPIEndpoint: + advertiseAddress: 10.0.0.2 + bindPort: 443 + nodeRegistration: {} + joinConfiguration: + controlPlane: {} + nodeRegistration: {} + ignition: + containerLinuxConfig: + additionalConfig: | + systemd: + units: + - name: cluster-api-init.service + enable: true + contents: |- + [Unit] + Description=Prepares the node for bootstrapping with cluster-api kubeadm + Before=kubeadm.service + After=network-online.target + Wants=network-online.target + [Service] + Type=oneshot + Restart=on-failure + RestartSec=5 + StartLimitBurst=0 + EnvironmentFile=/etc/environment + ExecStart=/var/lib/cluster-api-init/bootstrap.sh + [Install] + WantedBy=multi-user.target + files: + - path: /var/lib/cluster-api-init/bootstrap.sh + owner: "root:root" + permissions: "0744" + content: | + #!/usr/bin/env bash + set -eo pipefail + set +x + + apt update + apt install conntrack + + CNI_PLUGINS_VERSION="v1.3.0" + DEST="/opt/cni/bin" + mkdir -p "$DEST" + curl -L "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-amd64-${CNI_PLUGINS_VERSION}.tgz" | sudo tar -C "$DEST" -xz + + RELEASE="v1.30.6" + cd /usr/local/bin + sudo curl -L --remote-name-all https://dl.k8s.io/release/${RELEASE}/bin/linux/amd64/{kubeadm,kubelet,kubectl} + sudo chmod +x {kubeadm,kubelet,kubectl} + + RELEASE_VERSION="v0.16.2" + curl -sSL "https://raw.githubusercontent.com/kubernetes/release/${RELEASE_VERSION}/cmd/krel/templates/latest/kubelet/kubelet.service" | sed "s:/usr/bin:/usr/local/bin:g" | sudo tee /usr/lib/systemd/system/kubelet.service + sudo mkdir -p /usr/lib/systemd/system/kubelet.service.d + curl -sSL "https://raw.githubusercontent.com/kubernetes/release/${RELEASE_VERSION}/cmd/krel/templates/latest/kubeadm/10-kubeadm.conf" | sed "s:/usr/bin:/usr/local/bin:g" | sudo tee /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf + + systemctl enable kubelet.service + - path: /etc/containerd/config.toml + owner: "root:root" + permissions: "0644" + content: | + disabled_plugins = [] diff --git a/config/samples/example.yaml b/config/samples/example.yaml deleted file mode 100644 index 948c757..0000000 --- a/config/samples/example.yaml +++ /dev/null @@ -1,61 +0,0 @@ ---- -apiVersion: cluster.x-k8s.io/v1beta1 -kind: Cluster -metadata: - name: metal-test -spec: - clusterNetwork: - pods: - cidrBlocks: ["192.168.0.0/16"] - controlPlaneRef: - apiVersion: controlplane.cluster.x-k8s.io/v1beta1 - kind: KubeadmControlPlane - name: metal-test-controlplane - infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 - kind: MetalStackCluster - name: metal-test ---- -apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 -kind: MetalStackCluster -metadata: - name: metal-test -spec: - projectID: 00000000-0000-0000-0000-000000000001 - partition: mini-lab - firewall: - size: v1-small-x86 - image: firewall-ubuntu-3.0 - networks: - - internet-mini-lab ---- -kind: KubeadmControlPlane -apiVersion: controlplane.cluster.x-k8s.io/v1beta1 -metadata: - name: metal-test-controlplane -spec: - kubeadmConfigSpec: - format: ignition - initConfiguration: - nodeRegistration: {} - joinConfiguration: - controlPlane: {} - nodeRegistration: {} - machineTemplate: - nodeDrainTimeout: 10m - infrastructureRef: - kind: MetalStackMachineTemplate - apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 - name: metal-test-controlplane - replicas: 1 - version: v1.30.6 ---- -apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 -kind: MetalStackMachineTemplate -metadata: - name: metal-test-controlplane -spec: - template: - spec: - image: ubuntu-24.04 - size: v1-small-x86 diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index 2767e50..2291d43 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -2,5 +2,5 @@ namespace: default resources: -- example.yaml +- example-kubeadm.yaml # +kubebuilder:scaffold:manifestskustomizesamples diff --git a/internal/controller/metalstackcluster_controller.go b/internal/controller/metalstackcluster_controller.go index 9b88bf1..ac33d47 100644 --- a/internal/controller/metalstackcluster_controller.go +++ b/internal/controller/metalstackcluster_controller.go @@ -166,12 +166,12 @@ func (r *MetalStackClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { } func (r *clusterReconciler) reconcile() error { - nodeCIDR, err := r.ensureNodeNetwork() + nodeNetworkID, err := r.ensureNodeNetwork() if err != nil { return fmt.Errorf("unable to ensure node network: %w", err) } - r.log.Info("reconciled node network", "cidr", nodeCIDR) + r.log.Info("reconciled node network", "network-id", nodeNetworkID) ip, err := r.ensureControlPlaneIP() if err != nil { @@ -197,7 +197,7 @@ func (r *clusterReconciler) reconcile() error { return fmt.Errorf("failed to update infra cluster control plane endpoint: %w", err) } - fwdeploy, err := r.ensureFirewallDeployment(nodeCIDR) + fwdeploy, err := r.ensureFirewallDeployment(nodeNetworkID) if err != nil { return fmt.Errorf("unable to ensure firewall deployment: %w", err) } @@ -259,7 +259,7 @@ func (r *clusterReconciler) ensureNodeNetwork() (string, error) { return "", fmt.Errorf("error creating node network: %w", err) } - return resp.Payload.Prefixes[0], nil + return *resp.Payload.ID, nil case 1: nw := nws[0] @@ -267,7 +267,7 @@ func (r *clusterReconciler) ensureNodeNetwork() (string, error) { return "", errors.New("node network exists but the prefix is gone") } - return nw.Prefixes[0], nil + return *nw.ID, nil default: return "", fmt.Errorf("more than a single node network exists for this cluster, operator investigation is required") } @@ -399,7 +399,7 @@ func (r *clusterReconciler) findControlPlaneIP() ([]*models.V1IPResponse, error) return resp.Payload, nil } -func (r *clusterReconciler) ensureFirewallDeployment(nodeCIDR string) (*fcmv2.FirewallDeployment, error) { +func (r *clusterReconciler) ensureFirewallDeployment(nodeNetworkID string) (*fcmv2.FirewallDeployment, error) { deploy := &fcmv2.FirewallDeployment{ ObjectMeta: metav1.ObjectMeta{ Name: r.infraCluster.Name, @@ -440,7 +440,7 @@ func (r *clusterReconciler) ensureFirewallDeployment(nodeCIDR string) (*fcmv2.Fi deploy.Spec.Template.Spec.Size = r.infraCluster.Spec.Firewall.Size deploy.Spec.Template.Spec.Image = r.infraCluster.Spec.Firewall.Image - deploy.Spec.Template.Spec.Networks = append(r.infraCluster.Spec.Firewall.AdditionalNetworks, nodeCIDR) + deploy.Spec.Template.Spec.Networks = append(r.infraCluster.Spec.Firewall.AdditionalNetworks, nodeNetworkID) deploy.Spec.Template.Spec.RateLimits = r.infraCluster.Spec.Firewall.RateLimits deploy.Spec.Template.Spec.EgressRules = r.infraCluster.Spec.Firewall.EgressRules deploy.Spec.Template.Spec.LogAcceptedConnections = ptr.Deref(r.infraCluster.Spec.Firewall.LogAcceptedConnections, false) @@ -452,6 +452,10 @@ func (r *clusterReconciler) ensureFirewallDeployment(nodeCIDR string) (*fcmv2.Fi deploy.Spec.Template.Spec.NftablesExporterVersion = "" deploy.Spec.Template.Spec.NftablesExporterURL = "" + // TODO: we need to allow internet connection for the nodes before the firewall-controller can connect to the control-plane + // the FCM currently does not support this + deploy.Spec.Template.Spec.Userdata = "" + // TODO: do we need to generate ssh keys for the machines and the firewall in this controller? deploy.Spec.Template.Spec.SSHPublicKeys = nil