Skip to content

Commit

Permalink
feat(biohazard)!: Talos network config for new Fortinet stack, add Mu…
Browse files Browse the repository at this point in the history
…ltus NetworkAttachments

- Multus: switch HASS & KubeVirt VMs to use new NetworkAttachments

- Talos: FortiSwitch can only do tagged VLANs on its direct 1G ports, 10G unmanaged switch (TL-ST1008F) is downstream of FortiSwitch and packets with VLAN tags from TL-ST1008F are mysteriously dropped before it gets to other hosts or the upstream FortiSwitch including ARP

(but tagged VLANs worked when TL-ST1008F's upstream was PVE OVS bridge with OPNsense VM and VLANs? lolwut)
  • Loading branch information
JJGadgets committed Jun 21, 2024
1 parent fb3a8f8 commit 28875fc
Show file tree
Hide file tree
Showing 9 changed files with 208 additions and 139 deletions.
237 changes: 114 additions & 123 deletions kube/clusters/biohazard/talos/talconfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,67 +33,80 @@ nodes:
installDiskSelector:
size: "<= 600GB"
type: "nvme"
nameservers: ["${IP_HOME_DNS}"]
nameservers: ["${IP_ROUTER_VLAN_K8S}"]
disableSearchDomain: true
networkInterfaces:
- &m720q-net
mtu: 9000 # TODO: switch to 9000 once M720q risers arrive
interface: bond0
mtu: 9000
dhcp: false
deviceSelector:
# onboard I219-V NIC
# driver: "e1000e"
# hardwareAddr: "*:9a"
# TODO: switch to Mellanox once M720q risers arrive
driver: "mlx4_core"
hardwareAddr: "*:80"
bond: &bond0
mode: active-backup
miimom: 100
#primary: enp1s0 # TODO: wen eta Talos bond primary device selector
#primary_reselect: better
deviceSelectors:
# Mellanox ConnectX (enp1s0)
- driver: "mlx4_core"
hardwareAddr: "*:80"
addresses: ["${IP_ROUTER_VLAN_K8S_PREFIX}1/28"]
routes:
- network: "${IP_ROUTER_VLAN_K8S_CIDR}"
metric: 1
- network: "0.0.0.0/0"
gateway: "${IP_ROUTER_VLAN_K8S}"
#vip:
# ip: "${IP_CLUSTER_VIP}"
- &m720q-bond1
interface: bond1
bond:
mode: active-backup
miimon: 100
deviceSelectors:
- driver: e1000e
- &m720q-br0
interface: br0
mtu: 1500
dhcp: true # native/untagged VLAN will be either undefined or an empty VLAN with no DHCP in prod, this is for emergency/recovery use
bridge:
interfaces: [bond1]
stp: {enabled: true}
vlans:
- &m720q-v58
vlanId: 58
mtu: 9000
- &vlan-active-directory
vlanId: 888
mtu: 1500
dhcp: false
addresses: ["${IP_ROUTER_VLAN_K8S_PREFIX}1/28"]
routes:
- network: "${IP_ROUTER_VLAN_K8S_CIDR}"
metric: 1
- network: "0.0.0.0/0"
gateway: "${IP_ROUTER_VLAN_K8S}"
#vip:
# ip: "${IP_CLUSTER_VIP}"
- &vlan-ceph
vlanId: 678
mtu: 9000
- &vlan-iot
vlanId: 227
mtu: 1500
dhcp: false
addresses: ["${IP_VLAN_CEPH_PREFIX}1/28"]
routes:
- network: "${IP_VLAN_CEPH_CIDR}"
metric: 1
machineFiles:
- &ts
op: create
path: /var/etc/tailscale/auth.env
permissions: 0o600
content: |
TS_KUBE_SECRET=""
TS_STATE_DIR=/var/lib/tailscale
TS_USERSPACE=false
TS_ACCEPT_DNS=false
- &tsNode
op: append
path: /var/etc/tailscale/auth.env
permissions: 0o600
content: |
TS_HOSTNAME=ange-talos
TS_EXTRA_ARGS=--accept-routes=false --netfilter-mode=off --snat-subnet-routes=false --advertise-tags=tag:talos,tag:ange
TS_AUTHKEY=${SECRET_TAILSCALE_TALOS_AUTHKEY_ANGE}
TS_ROUTES=${IP_ROUTER_VLAN_K8S_PREFIX}1/32
# machineFiles:
# - &ts
# op: create
# path: /var/etc/tailscale/auth.env
# permissions: 0o600
# content: |
# TS_KUBE_SECRET=""
# TS_STATE_DIR=/var/lib/tailscale
# TS_USERSPACE=false
# TS_ACCEPT_DNS=false
# - &tsNode
# op: append
# path: /var/etc/tailscale/auth.env
# permissions: 0o600
# content: |
# TS_HOSTNAME=ange-talos
# TS_EXTRA_ARGS=--accept-routes=false --netfilter-mode=off --snat-subnet-routes=false --advertise-tags=tag:talos,tag:ange
# TS_AUTHKEY=${SECRET_TAILSCALE_TALOS_AUTHKEY_ANGE}
# TS_ROUTES=${IP_ROUTER_VLAN_K8S_PREFIX}1/32
schematic:
customization:
systemExtensions:
officialExtensions:
- siderolabs/i915-ucode
- siderolabs/intel-ucode
- siderolabs/iscsi-tools
- siderolabs/tailscale
#- siderolabs/tailscale
inlinePatch:
machine:
sysfs:
Expand All @@ -105,44 +118,46 @@ nodes:
ipAddress: "${IP_ROUTER_VLAN_K8S_PREFIX}2"
networkInterfaces:
- <<: *m720q-net
deviceSelector:
driver: "mlx4_core"
hardwareAddr: "*:6a"
vlans:
- <<: *m720q-v58
addresses: ["${IP_ROUTER_VLAN_K8S_PREFIX}2/28"]
- <<: *vlan-ceph
addresses: ["${IP_VLAN_CEPH_PREFIX}2/28"]
machineFiles:
- *ts
- <<: *tsNode
content: |
TS_HOSTNAME=charlotte-talos
TS_EXTRA_ARGS=--accept-routes=false --netfilter-mode=off --snat-subnet-routes=false --advertise-tags=tag:talos,tag:charlotte
TS_AUTHKEY=${SECRET_TAILSCALE_TALOS_AUTHKEY_CHARLOTTE}
TS_ROUTES=${IP_ROUTER_VLAN_K8S_PREFIX}2/32
addresses: ["${IP_ROUTER_VLAN_K8S_PREFIX}2/28"]
bond:
<<: *bond0
deviceSelectors:
# Mellanox ConnectX (enp1s0)
- driver: "mlx4_core"
hardwareAddr: "*:6a"
- *m720q-bond0
- *m720q-br0
# machineFiles:
# - *ts
# - <<: *tsNode
# content: |
# TS_HOSTNAME=charlotte-talos
# TS_EXTRA_ARGS=--accept-routes=false --netfilter-mode=off --snat-subnet-routes=false --advertise-tags=tag:talos,tag:charlotte
# TS_AUTHKEY=${SECRET_TAILSCALE_TALOS_AUTHKEY_CHARLOTTE}
# TS_ROUTES=${IP_ROUTER_VLAN_K8S_PREFIX}2/32

- <<: *m720q # TODO: this is cp2 but bare metal, only apply after PVE converted to Talos
hostname: "chise.${DNS_CLUSTER}" # M720q, i3-8100T 4C4T, 32GB RAM, 512GB OS NVMe
ipAddress: "${IP_ROUTER_VLAN_K8S_PREFIX}3"
networkInterfaces:
- <<: *m720q-net
deviceSelector:
driver: "mlx4_core"
hardwareAddr: "*:40"
vlans:
- <<: *m720q-v58
addresses: ["${IP_ROUTER_VLAN_K8S_PREFIX}3/28"]
- <<: *vlan-ceph
addresses: ["${IP_VLAN_CEPH_PREFIX}3/28"]
machineFiles:
- *ts
- <<: *tsNode
content: |
TS_HOSTNAME=chise-talos
TS_EXTRA_ARGS=--accept-routes=false --netfilter-mode=off --snat-subnet-routes=false --advertise-tags=tag:talos,tag:chise
TS_AUTHKEY=${SECRET_TAILSCALE_TALOS_AUTHKEY_CHISE}
TS_ROUTES=${IP_ROUTER_VLAN_K8S_PREFIX}3/32
addresses: ["${IP_ROUTER_VLAN_K8S_PREFIX}3/28"]
bond:
<<: *bond0
deviceSelectors:
# Mellanox ConnectX (enp1s0)
- driver: "mlx4_core"
hardwareAddr: "*:40"
- *m720q-bond0
- *m720q-br0
# machineFiles:
# - *ts
# - <<: *tsNode
# content: |
# TS_HOSTNAME=chise-talos
# TS_EXTRA_ARGS=--accept-routes=false --netfilter-mode=off --snat-subnet-routes=false --advertise-tags=tag:talos,tag:chise
# TS_AUTHKEY=${SECRET_TAILSCALE_TALOS_AUTHKEY_CHISE}
# TS_ROUTES=${IP_ROUTER_VLAN_K8S_PREFIX}3/32
inlinePatch:
machine:
sysfs:
Expand All @@ -161,12 +176,6 @@ patches:
#- SUBSYSTEM=="drm", KERNEL=="renderD*", GROUP="44", MODE="0660"
- SUBSYSTEM=="drm", GROUP="44", MODE="0660"
- &kubeletExtraArgs |-
- op: add
path: /machine/kubelet/extraArgs
value:
feature-gates: GracefulNodeShutdown=true,ServerSideApply=true

- &machinePatch |-
machine:
install:
Expand All @@ -189,7 +198,7 @@ patches:
aliases: ["nas.${DNS_MAIN}"]
time:
disabled: false
servers: ["${IP_ROUTER_LAN}"]
servers: ["${IP_ROUTER_VLAN_K8S}"]
bootTimeout: 2m0s
# kernel:
# modules:
Expand Down Expand Up @@ -246,13 +255,6 @@ patches:
controlPlane:
patches:

- &apiServerExtraArgs |-
- op: add
path: /cluster/apiServer/extraArgs
value:
feature-gates: GracefulNodeShutdown=true,ServerSideApply=true

- &apiServerResources |-
cluster:
apiServer:
Expand Down Expand Up @@ -289,36 +291,25 @@ controlPlane:
extraArgs:
listen-metrics-urls: "http://0.0.0.0:2381"

# - &scheduler |-
# cluster:
# scheduler:
# extraArgs:
# config: "/custom/etc/kube-scheduler/config.yaml"
# extraVolumes:
# - hostPath: "/var/etc/kube-scheduler"
# mountPath: "/custom/etc/kube-scheduler"
# readonly: true
# machine:
# files:
# - op: create
# path: "/var/etc/kube-scheduler/config.yaml"
# permissions: 0o400
# content: |
# apiVersion: kubescheduler.config.k8s.io/v1
# kind: KubeSchedulerConfiguration
# profiles:
# - schedulerName: default-scheduler
# pluginConfig:
# - name: PodTopologySpread
# args:
# defaultingType: List
# defaultConstraints:
# - maxSkew: 1
# topologyKey: "kubernetes.io/hostname"
# whenUnsatisfiable: ScheduleAnyway
# - maxSkew: 5
# topologyKey: "topology.kubernetes.io/zone"
# whenUnsatisfiable: ScheduleAnyway
- &scheduler |-
cluster:
scheduler:
config:
apiVersion: kubescheduler.config.k8s.io/v1
kind: KubeSchedulerConfiguration
profiles:
- schedulerName: default-scheduler
pluginConfig:
- name: PodTopologySpread
args:
defaultingType: List
defaultConstraints:
- maxSkew: 1
topologyKey: "kubernetes.io/hostname"
whenUnsatisfiable: DoNotSchedule
- maxSkew: 3
topologyKey: "topology.kubernetes.io/zone"
whenUnsatisfiable: ScheduleAnyway

# TODO: can't create files outside /var, maybe open FR to whitelist this?
# - &nfsMountOptions |-
Expand Down
9 changes: 9 additions & 0 deletions kube/deploy/apps/home-assistant/app/hr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,15 @@ spec:
labels:
ingress.home.arpa/nginx-internal: "allow"
egress.home.arpa/iot: "allow"
annotations:
k8s.v1.cni.cncf.io/networks: |
[{
"name":"iot",
"namespace": "home-assistant",
"ips": ["${APP_IP_HOME_ASSISTANT_IOT}"],
"mac": "${APP_MAC_HOME_ASSISTANT_IOT}",
"gateway": "${IP_ROUTER_VLAN_IOT}"
}]
containers:
main:
image:
Expand Down
32 changes: 32 additions & 0 deletions kube/deploy/apps/home-assistant/app/multus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
---
apiVersion: "k8s.cni.cncf.io/v1"
kind: NetworkAttachmentDefinition
metadata:
name: iot
namespace: home-assistant
spec:
config: |-
{
"cniVersion": "0.3.1",
"name": "iot",
"plugins": [
{
"type": "macvlan",
"master": "br0.227",
"mode": "bridge",
"capabilities": {
"ips": true
},
"ipam": {
"type": "static",
"routes": [
{ "dst": "${IP_VLAN_IOT_CIDR}", "gw": "${IP_ROUTER_VLAN_IOT}" }
]
}
},
{
"capabilities": { "mac": true },
"type": "tuning"
}
]
}
1 change: 1 addition & 0 deletions kube/deploy/apps/home-assistant/ks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ spec:
targetNamespace: "home-assistant"
dependsOn:
- name: home-assistant-pvc
- name: 1-core-1-networking-multus-app
---
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ cluster:

## NOTE: Cilium's routing modes for inter-nodes pod traffic
routingMode: native
devices: 'enp1s0' # use specific VLAN # TODO: figure out how to regex to match all interfaces with VLAN 58
devices: 'enp1s0,br0' # use specific VLAN # TODO: figure out how to regex to match all interfaces with VLAN 58
autoDirectNodeRoutes: true
ipv4NativeRoutingCIDR: "${IP_POD_CIDR_V4}"
loadBalancer:
Expand All @@ -58,6 +58,10 @@ k8sServiceHost: "127.0.0.1"
k8sServicePort: "7445"
kubeProxyReplacementHealthzBindAddr: "0.0.0.0:10256"

## Multus compatibility
cni:
exclusive: false

## NOTE: Cilium can automatically kill and respawn pods upon ConfigMap updates or other resource changes
rollOutCiliumPods: true
operator:
Expand Down
4 changes: 2 additions & 2 deletions kube/deploy/core/_networking/multus/app/hr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ spec:
values:
image:
repository: ghcr.io/k8snetworkplumbingwg/multus-cni
tag: v4.0.2-thick
tag: v4.0.2-thick@sha256:3fbcc32bd4e4d15bd93c96def784a229cd84cca27942bf4858b581f31c97ee02
cni:
image:
repository: ghcr.io/angelnu/cni-plugins
tag: 1.4.0@sha256:cd2b132f3725c303a809f253aac07d11e696910a6053e7535e609118151aa9fe
tag: 1.5.1@sha256:caac0fccffb23811386919e3f8cc216de7e1a907648f19bc671d9e495fb77069
paths:
config: /etc/cni/net.d
bin: /opt/cni/bin
Expand Down
Loading

0 comments on commit 28875fc

Please sign in to comment.