Skip to content

Commit

Permalink
Hotfixes for OSM (#116)
Browse files Browse the repository at this point in the history
* Remove redundant flag for cluster-name

Signed-off-by: Waleed Malik <[email protected]>

* Fix docker publish target for releases

* Fix leader election for worker manager

* Use dedicated worker client to avoid running into unsynced cache
  • Loading branch information
ahmedwaleedmalik authored Dec 23, 2021
1 parent 3e68f07 commit 8b71f99
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 46 deletions.
5 changes: 5 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Github
/.github/

# Dependency directories
/vendor
11 changes: 9 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ build: $(CMD)
.PHONY: $(CMD)
$(CMD): %: $(BUILD_DEST)/%

$(BUILD_DEST)/%: cmd/%
$(BUILD_DEST)/%: cmd/%
go build -o $@ ./cmd/$*

.PHONY: run
Expand All @@ -106,7 +106,14 @@ download-gocache:
docker-image:
docker build --build-arg GO_VERSION=$(GO_VERSION) -t $(IMAGE_NAME) .


.PHONY: docker-image-publish
docker-image-publish: docker-image
docker push $(IMAGE_NAME)
if [[ -n "$(GIT_TAG)" ]]; then \
$(eval IMAGE_TAG = $(GIT_TAG)) \
docker build -t $(IMAGE_NAME) . && \
docker push $(IMAGE_NAME) && \
$(eval IMAGE_TAG = latest) \
docker build -t $(IMAGE_NAME) . ;\
docker push $(IMAGE_NAME) ;\
fi
55 changes: 32 additions & 23 deletions cmd/osm-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,25 @@ import (
"k8s.io/client-go/util/homedir"
"k8s.io/klog"
ctrl "sigs.k8s.io/controller-runtime"
ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/manager"
)

type options struct {
workerCount int
namespace string
clusterName string
containerRuntime string
externalCloudProvider bool
pauseImage string
initialTaints string
nodeHTTPProxy string
nodeNoProxy string
nodePortRange string
podCidr string

workerCount int
namespace string
clusterName string
containerRuntime string
externalCloudProvider bool
pauseImage string
initialTaints string
nodeHTTPProxy string
nodeNoProxy string
nodePortRange string
podCidr string
enableLeaderElection bool
clusterDNSIPs string
workerClusterKubeconfig string
kubeconfig string
Expand All @@ -69,10 +70,6 @@ type options struct {
workerMetricsAddress string
}

const (
defaultLeaderElectionNamespace = "kube-system"
)

func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme.Scheme))
utilruntime.Must(osmv1alpha1.AddToScheme(scheme.Scheme))
Expand All @@ -89,7 +86,6 @@ func main() {
}
flag.StringVar(&opt.workerClusterKubeconfig, "worker-cluster-kubeconfig", "", "Path to kubeconfig of cluster where provisioning secrets are created")
flag.IntVar(&opt.workerCount, "worker-count", 10, "Number of workers which process reconciliation in parallel.")
flag.StringVar(&opt.clusterName, "cluster-name", "", "The cluster where the OSC will run.")
flag.StringVar(&opt.namespace, "namespace", "", "The namespace where the OSC controller will run.")
flag.StringVar(&opt.containerRuntime, "container-runtime", "containerd", "container runtime to deploy.")
flag.BoolVar(&opt.externalCloudProvider, "external-cloud-provider", false, "cloud-provider Kubelet flag set to external.")
Expand All @@ -106,6 +102,8 @@ func main() {

flag.StringVar(&opt.workerHealthProbeAddress, "worker-health-probe-address", "127.0.0.1:8086", "For worker manager, the address on which the liveness check on /healthz and readiness check on /readyz will be available")
flag.StringVar(&opt.workerMetricsAddress, "worker-metrics-address", "127.0.0.1:8081", "For worker manager, the address on which Prometheus metrics will be available under /metrics")
flag.BoolVar(&opt.enableLeaderElection, "leader-elect", true, "Enable leader election for controller manager.")

flag.Parse()

if len(opt.namespace) == 0 {
Expand Down Expand Up @@ -142,6 +140,7 @@ func main() {

// Start with assuming that current cluster will be used as worker cluster
workerMgr := mgr
workerClient := mgr.GetClient()

// Handling for worker cluster
if opt.workerClusterKubeconfig != "" {
Expand All @@ -152,10 +151,19 @@ func main() {
klog.Fatal(err)
}

// Build dedicated client for worker cluster, some read actions fail on the split client created by manager due to informers not syncing in-time
workerClient, err = ctrlruntimeclient.New(workerClusterConfig, ctrlruntimeclient.Options{
Scheme: scheme.Scheme,
})
if err != nil {
klog.Fatalf("failed to build worker client: %v", err)
}

workerMgr, err = manager.New(workerClusterConfig, manager.Options{
LeaderElection: true,
LeaderElectionID: "operating-system-manager-worker-manager",
LeaderElectionNamespace: defaultLeaderElectionNamespace,
LeaderElection: opt.enableLeaderElection,
LeaderElectionID: "operating-system-manager-worker-manager",
// We use hard-coded namespace kube-system here since manager uses worker cluster config
LeaderElectionNamespace: "kube-system",
HealthProbeBindAddress: opt.workerHealthProbeAddress,
MetricsBindAddress: opt.workerMetricsAddress,
Port: 9444,
Expand Down Expand Up @@ -186,10 +194,10 @@ func main() {
if err := osc.Add(
workerMgr,
log,
workerClient,
mgr.GetClient(),
opt.kubeconfig,
opt.namespace,
opt.clusterName,
opt.workerCount,
parsedClusterDNSIPs,
generator.NewDefaultCloudConfigGenerator(""),
Expand All @@ -214,12 +222,13 @@ func main() {
func createManager(opt *options) (manager.Manager, error) {
// Manager options
options := manager.Options{
LeaderElection: true,
LeaderElection: opt.enableLeaderElection,
LeaderElectionID: "operating-system-manager",
LeaderElectionNamespace: defaultLeaderElectionNamespace,
LeaderElectionNamespace: opt.namespace,
HealthProbeBindAddress: opt.healthProbeAddress,
MetricsBindAddress: opt.metricsAddress,
Port: 9443,
Namespace: opt.namespace,
}

mgr, err := manager.New(config.GetConfigOrDie(), options)
Expand Down
23 changes: 14 additions & 9 deletions deploy/operating-system-manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
---
apiVersion: v1
data:
kubeconfig: __user_cluster_kubeconfig__
kubeconfig: __worker_cluster_kubeconfig__
kind: Secret
metadata:
name: external-cluster-kubeconfig
name: worker-cluster-kubeconfig
namespace: kube-system
type: Opaque
---
Expand Down Expand Up @@ -60,9 +60,7 @@ rules:
resources:
- operatingsystemprofiles
verbs:
- get
- list
- watch
- "*"
- apiGroups:
- operatingsystemmanager.k8c.io
resources:
Expand All @@ -81,6 +79,13 @@ rules:
- operatingsystemconfigs/status
verbs:
- get
- apiGroups:
- apps
resources:
- deployments
verbs:
- watch
- list
- apiGroups:
- ""
resources:
Expand Down Expand Up @@ -156,10 +161,10 @@ spec:
- -metrics-address=0.0.0.0:8080
- -health-probe-address=0.0.0.0:8085
- -namespace=kube-system
- -external-cluster-kubeconfig=/etc/kubernetes/kubeconfig/kubeconfig
- -worker-cluster-kubeconfig=/etc/kubernetes/kubeconfig/kubeconfig
volumeMounts:
- mountPath: /etc/kubernetes/kubeconfig
name: external-cluster-kubeconfig
name: worker-cluster-kubeconfig
readOnly: true
ports:
- containerPort: 8085
Expand All @@ -182,7 +187,7 @@ spec:
memory: 256Mi
cpu: 100m
volumes:
- name: external-cluster-kubeconfig
- name: worker-cluster-kubeconfig
secret:
defaultMode: 420
secretName: external-cluster-kubeconfig
secretName: worker-cluster-kubeconfig
6 changes: 2 additions & 4 deletions pkg/controllers/osc/osc_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ type Reconciler struct {
log *zap.SugaredLogger

namespace string
clusterAddress string
containerRuntime string
externalCloudProvider bool
pauseImage string
Expand All @@ -74,10 +73,10 @@ type Reconciler struct {
func Add(
mgr manager.Manager,
log *zap.SugaredLogger,
workerClient client.Client,
client client.Client,
workerClusterKubeconfig string,
namespace string,
clusterName string,
workerCount int,
clusterDNSIPs []net.IP,
generator generator.CloudConfigGenerator,
Expand All @@ -91,11 +90,10 @@ func Add(
nodePortRange string) error {
reconciler := &Reconciler{
log: log,
workerClient: mgr.GetClient(),
workerClient: workerClient,
Client: client,
workerClusterKubeconfig: workerClusterKubeconfig,
namespace: namespace,
clusterAddress: clusterName,
generator: generator,
clusterDNSIPs: clusterDNSIPs,
containerRuntime: containerRuntime,
Expand Down
8 changes: 0 additions & 8 deletions pkg/controllers/osc/osc_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ func init() {

type testConfig struct {
namespace string
clusterAddress string
containerRuntime string
kubeVersion string
clusterDNSIPs []net.IP
Expand Down Expand Up @@ -100,7 +99,6 @@ func TestReconciler_Reconcile(t *testing.T) {
secretFile: "secret-ubuntu-20.04-aws-containerd.yaml",
config: testConfig{
namespace: "cloud-init-settings",
clusterAddress: "http://127.0.0.1/configs",
containerRuntime: "containerd",
kubeVersion: "1.22.1",
clusterDNSIPs: []net.IP{net.IPv4(10, 0, 0, 0)},
Expand All @@ -119,7 +117,6 @@ func TestReconciler_Reconcile(t *testing.T) {
secretFile: "secret-ubuntu-20.04-aws-docker.yaml",
config: testConfig{
namespace: "cloud-init-settings",
clusterAddress: "http://127.0.0.1/configs",
containerRuntime: "docker",
kubeVersion: "1.22.1",
clusterDNSIPs: []net.IP{net.IPv4(10, 0, 0, 0)},
Expand All @@ -138,7 +135,6 @@ func TestReconciler_Reconcile(t *testing.T) {
secretFile: "secret-flatcar-aws-containerd.yaml",
config: testConfig{
namespace: "cloud-init-settings",
clusterAddress: "http://127.0.0.1/configs",
containerRuntime: "containerd",
kubeVersion: "1.22.1",
clusterDNSIPs: []net.IP{net.IPv4(10, 0, 0, 0)},
Expand All @@ -157,7 +153,6 @@ func TestReconciler_Reconcile(t *testing.T) {
secretFile: "secret-flatcar-aws-docker.yaml",
config: testConfig{
namespace: "cloud-init-settings",
clusterAddress: "http://127.0.0.1/configs",
containerRuntime: "docker",
kubeVersion: "1.22.1",
clusterDNSIPs: []net.IP{net.IPv4(10, 0, 0, 0)},
Expand All @@ -176,7 +171,6 @@ func TestReconciler_Reconcile(t *testing.T) {
secretFile: "secret-rhel-8.x-containerd.yaml",
config: testConfig{
namespace: "cloud-init-settings",
clusterAddress: "http://127.0.0.1/configs",
containerRuntime: "containerd",
kubeVersion: "1.22.1",
clusterDNSIPs: []net.IP{net.IPv4(10, 0, 0, 0)},
Expand Down Expand Up @@ -277,7 +271,6 @@ func TestMachineDeploymentDeletion(t *testing.T) {
secretFile: "secret-ubuntu-20.04-aws-containerd.yaml",
config: testConfig{
namespace: "cloud-init-settings",
clusterAddress: "http://127.0.0.1/configs",
containerRuntime: "containerd",
},
cloudProvider: "aws",
Expand Down Expand Up @@ -430,7 +423,6 @@ func buildReconciler(fakeClient client.Client, config testConfig) Reconciler {
log: testUtil.DefaultLogger,
generator: generator.NewDefaultCloudConfigGenerator(""),
namespace: config.namespace,
clusterAddress: config.clusterAddress,
workerClusterKubeconfig: kubeconfigPath,
containerRuntime: config.containerRuntime,
clusterDNSIPs: config.clusterDNSIPs,
Expand Down

0 comments on commit 8b71f99

Please sign in to comment.