Skip to content

Commit

Permalink
Integrate metal-ccm.
Browse files Browse the repository at this point in the history
  • Loading branch information
Gerrit91 committed Jan 23, 2025
1 parent c22e1ca commit 5c58ca5
Show file tree
Hide file tree
Showing 6 changed files with 248 additions and 14 deletions.
10 changes: 2 additions & 8 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ When the control plane node was provisioned, you can obtain the kubeconfig like:
kubectl get secret metal-test-kubeconfig -o jsonpath='{.data.value}' | base64 -d > .capms-cluster-kubeconfig.yaml
```

For now, the provider ID has to be manually added to the node object because we did not integrate the [metal-ccm](https://github.com/metal-stack/metal-ccm) yet:
The node's provider ID is provided by the [metal-ccm](https://github.com/metal-stack/metal-ccm), which needs to be deployed into the cluster:

```bash
kubectl --kubeconfig=.capms-cluster-kubeconfig.yaml patch node <control-plane-node-name> --patch='{"spec":{"providerID": "metal://<machine-id>"}}'
make deploy-metal-ccm
```

It is now expected to deploy a CNI to the cluster:
Expand Down Expand Up @@ -77,12 +77,6 @@ EOF
> [!note]
> Actually, Calico should be configured using BGP (no overlay), eBPF and DSR. An example will be proposed in this repository at a later point in time.
As soon as the worker node was provisioned, the same provider ID patch as above is required:

```bash
kubectl --kubeconfig=.capms-cluster-kubeconfig.yaml patch node <worker-node-name> --patch='{"spec":{"providerID": "metal://<machine-id>"}}'
```

That's it!

### To Deploy on the cluster
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ push-to-capi-lab: generate manifests build install deploy
kubectl --kubeconfig=$(KUBECONFIG) patch deployments.apps -n capms-system capms-controller-manager --patch='{"spec":{"template":{"spec":{"containers":[{"name": "manager","imagePullPolicy":"IfNotPresent","image":"$(IMG)"}]}}}}'
kubectl --kubeconfig=$(KUBECONFIG) delete pod -n capms-system -l control-plane=controller-manager

.PHONY: deploy-metal-ccm
deploy-metal-ccm:
cat capi-lab/metal-ccm.yaml | envsubst | kubectl --kubeconfig=.capms-cluster-kubeconfig.yaml apply -f -

.PHONY: manifests
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases
Expand Down
6 changes: 6 additions & 0 deletions capi-lab/firewall-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ egress:
protocol: TCP
to:
- 0.0.0.0/0
- comment: allow outgoing traffic to control plane for ccm
ports:
- 8080
protocol: TCP
to:
- 172.16.0.0/12 # Docker Networks
- comment: allow outgoing DNS and NTP traffic via UDP
ports:
- 53
Expand Down
201 changes: 201 additions & 0 deletions capi-lab/metal-ccm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
---
apiVersion: v1
kind: Secret
metadata:
name: cloud-controller-manager
namespace: kube-system
stringData:
api-url: ${METAL_API_URL}
api-hmac: ${METAL_API_HMAC}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: cloud-controller-manager
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cloud-controller-manager
rules:
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- update
- apiGroups:
- ""
resources:
- nodes
verbs:
- "*"
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
- apiGroups:
- ""
resources:
- services
- services/status
- endpoints
verbs:
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- serviceaccounts
- serviceaccounts/token
verbs:
- create
- get
- list
- watch
- apiGroups:
- ""
resources:
- secrets
- configmaps
verbs:
- get
- list
- watch
- update
- create
- patch
- delete
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- get
- create
- update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cloud-controller-manager
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cloud-controller-manager
subjects:
- kind: ServiceAccount
name: cloud-controller-manager
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cloud-controller-manager
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cloud-controller-manager
subjects:
- kind: ServiceAccount
name: cloud-controller-manager
namespace: kube-system
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: cloud-controller-manager
name: cloud-controller-manager
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app: cloud-controller-manager
strategy:
type: RollingUpdate
template:
metadata:
labels:
app: cloud-controller-manager
spec:
containers:
- command:
- ./metal-cloud-controller-manager
- --cluster-cidr=10.240.0.0/12
- --cluster-name=
- --concurrent-service-syncs=10
- --leader-elect=true
- --secure-port=10258
- --use-service-account-credentials
- --v=2
env:
- name: METAL_API_URL
valueFrom:
secretKeyRef:
key: api-url
name: cloud-controller-manager
- name: METAL_AUTH_HMAC
valueFrom:
secretKeyRef:
key: api-hmac
name: cloud-controller-manager
- name: METAL_PROJECT_ID
value: 00000000-0000-0000-0000-000000000001
- name: METAL_PARTITION_ID
value: mini-lab
# associates service type load balancer ips with this cluster:
- name: METAL_CLUSTER_ID
value: 313f567a-2663-4310-8b0f-2415952c6c63
- name: METAL_DEFAULT_EXTERNAL_NETWORK_ID
value: internet-mini-lab
- name: METAL_ADDITIONAL_NETWORKS
value: internet-mini-lab,feb8021c-fec0-4e11-8242-d7e13e7845ca
- name: METAL_SSH_PUBLICKEY
value: ""
image: ghcr.io/metal-stack/metal-ccm:v0.9.3
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 2
httpGet:
path: /healthz
port: 10258
scheme: HTTPS
initialDelaySeconds: 15
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 15
name: cloud-controller-manager
resources:
limits:
cpu: 250m
memory: 256Mi
requests:
cpu: 100m
memory: 64Mi
nodeSelector:
node-role.kubernetes.io/control-plane: ""
hostNetwork: true
serviceAccountName: cloud-controller-manager
tolerations:
- effect: NoSchedule
operator: Exists
key: node-role.kubernetes.io/control-plane
- effect: NoSchedule
key: node.cloudprovider.kubernetes.io/uninitialized
value: "true"
restartPolicy: Always
volumes:
- name: cloud-controller-manager
secret:
defaultMode: 420
secretName: cloud-controller-manager
24 changes: 21 additions & 3 deletions config/samples/example-kubeadm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,24 @@ spec:
format: ignition
clusterConfiguration:
controlPlaneEndpoint: 203.0.113.129:443
apiServer:
extraArgs:
cloud-provider: external
controllerManager:
extraArgs:
cloud-provider: external
initConfiguration:
localAPIEndpoint:
advertiseAddress: 127.0.0.1
bindPort: 443
nodeRegistration: {}
nodeRegistration:
kubeletExtraArgs:
cloud-provider: external
joinConfiguration:
controlPlane: {}
nodeRegistration: {}
nodeRegistration:
kubeletExtraArgs:
cloud-provider: external
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: MachineDeployment
Expand Down Expand Up @@ -113,5 +123,13 @@ spec:
format: ignition
clusterConfiguration:
controlPlaneEndpoint: 203.0.113.129:443
apiServer:
extraArgs:
cloud-provider: external
controllerManager:
extraArgs:
cloud-provider: external
joinConfiguration:
nodeRegistration: {}
nodeRegistration:
kubeletExtraArgs:
cloud-provider: external
17 changes: 14 additions & 3 deletions internal/controller/metalstackmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
ipmodels "github.com/metal-stack/metal-go/api/client/ip"
metalmachine "github.com/metal-stack/metal-go/api/client/machine"
"github.com/metal-stack/metal-go/api/models"
"github.com/metal-stack/metal-lib/pkg/pointer"
"github.com/metal-stack/metal-lib/pkg/tag"
)

Expand Down Expand Up @@ -231,7 +232,7 @@ func (r *machineReconciler) reconcile() error {
return err
}

r.infraMachine.Spec.ProviderID = "metal://" + *m.ID
r.infraMachine.Spec.ProviderID = encodeProviderID(m)

err = helper.Patch(r.ctx, r.infraMachine) // TODO:check whether patch is not executed when no changes occur
if err != nil {
Expand Down Expand Up @@ -354,7 +355,7 @@ func (r *machineReconciler) status() error {
conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineHealthy, "NotHealthy", clusterv1.ConditionSeverityWarning, "machine not created")
conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineReady, "NotReady", clusterv1.ConditionSeverityWarning, "machine not created")
default:
if r.infraMachine.Spec.ProviderID == "metal://"+*m.ID {
if r.infraMachine.Spec.ProviderID == encodeProviderID(m) {
conditions.MarkTrue(r.infraMachine, v1alpha1.ProviderMachineCreated)
} else {
conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineCreated, "NotSet", clusterv1.ConditionSeverityWarning, "provider id was not yet patched into the machine's spec")
Expand Down Expand Up @@ -440,7 +441,7 @@ func (r *machineReconciler) status() error {

func (r *machineReconciler) findProviderMachine() (*models.V1MachineResponse, error) {
mfr := &models.V1MachineFindRequest{
ID: strings.TrimPrefix(r.infraMachine.Spec.ProviderID, "metal://"),
ID: decodeProviderID(r.infraMachine.Spec.ProviderID),
AllocationProject: r.infraCluster.Spec.ProjectID,
Tags: r.machineTags(),
}
Expand Down Expand Up @@ -473,3 +474,13 @@ func (r *machineReconciler) machineTags() []string {

return tags
}

func encodeProviderID(m *models.V1MachineResponse) string {
return fmt.Sprintf("metal://%s/%s", pointer.SafeDeref(pointer.SafeDeref(m.Partition).ID), pointer.SafeDeref(m.ID))
}

func decodeProviderID(id string) string {
withPartition := strings.TrimPrefix(id, "metal://")
_, res, _ := strings.Cut(withPartition, "/")
return res
}

0 comments on commit 5c58ca5

Please sign in to comment.