From aed3df8730a9c387f0732cb433e583425d2cb85e Mon Sep 17 00:00:00 2001 From: Bohdan Siryk Date: Wed, 4 Oct 2023 15:44:14 +0300 Subject: [PATCH] nodereload was refactored --- Makefile | 7 ++ .../v1beta1/nodereload_types.go | 26 ++--- .../v1beta1/zz_generated.deepcopy.go | 39 ++++++- ...resources.instaclustr.com_nodereloads.yaml | 27 +++++ .../clusterresources_v1beta1_nodereload.yaml | 6 +- .../samples/clusters_v1beta1_postgresql.yaml | 2 +- .../clusterresources/nodereload_controller.go | 110 ++++++++++++------ main.go | 2 +- pkg/instaclustr/client.go | 4 + 9 files changed, 168 insertions(+), 55 deletions(-) diff --git a/Makefile b/Makefile index b12ed5db3..0d3e1fd47 100644 --- a/Makefile +++ b/Makefile @@ -188,3 +188,10 @@ cert-deploy: ## Deploy cert-manager .PHONY: cert-undeploy cert-undeploy: ## UnDeploy cert-manager kubectl delete -f https://github.com/cert-manager/cert-manager/releases/download/v1.10.0/cert-manager.yaml + +.PHONY dev-build: +dev-build: docker-build kind-load deploy ## builds docker-image, loads it to kind cluster and deploys operator + +.PHONY: kind-load +kind-load: ## loads given image to kind cluster + kind load docker-image ${IMG} diff --git a/apis/clusterresources/v1beta1/nodereload_types.go b/apis/clusterresources/v1beta1/nodereload_types.go index fcbb47bcf..c45a47d6e 100644 --- a/apis/clusterresources/v1beta1/nodereload_types.go +++ b/apis/clusterresources/v1beta1/nodereload_types.go @@ -30,8 +30,11 @@ type NodeReloadSpec struct { // NodeReloadStatus defines the observed state of NodeReload type NodeReloadStatus struct { - NodeInProgress Node `json:"nodeInProgress,omitempty"` + NodeInProgress *Node `json:"nodeInProgress,omitempty"` CurrentOperationStatus *Operation `json:"currentOperationStatus,omitempty"` + PendingNodes []*Node `json:"pendingNodes,omitempty"` + CompletedNodes []*Node `json:"completedNodes,omitempty"` + FailedNodes []*Node `json:"failedNodes,omitempty"` } type Node struct { @@ -76,19 +79,12 @@ func init() { SchemeBuilder.Register(&NodeReload{}, &NodeReloadList{}) } -func (nr *NodeReloadStatus) FromInstAPI(status *models.NodeReloadStatus) *NodeReloadStatus { - var nrStatus = &NodeReloadStatus{ - NodeInProgress: Node{ - ID: status.NodeID, - }, - CurrentOperationStatus: &Operation{ - OperationID: status.OperationID, - TimeCreated: status.TimeCreated, - TimeModified: status.TimeModified, - Status: status.Status, - Message: status.Message, - }, +func (nr *NodeReloadStatus) FromInstAPI(status *models.NodeReloadStatus) { + nr.CurrentOperationStatus = &Operation{ + OperationID: status.OperationID, + TimeCreated: status.TimeCreated, + TimeModified: status.TimeModified, + Status: status.Status, + Message: status.Message, } - - return nrStatus } diff --git a/apis/clusterresources/v1beta1/zz_generated.deepcopy.go b/apis/clusterresources/v1beta1/zz_generated.deepcopy.go index db442acec..c9c23a015 100644 --- a/apis/clusterresources/v1beta1/zz_generated.deepcopy.go +++ b/apis/clusterresources/v1beta1/zz_generated.deepcopy.go @@ -1281,12 +1281,49 @@ func (in *NodeReloadSpec) DeepCopy() *NodeReloadSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeReloadStatus) DeepCopyInto(out *NodeReloadStatus) { *out = *in - out.NodeInProgress = in.NodeInProgress + if in.NodeInProgress != nil { + in, out := &in.NodeInProgress, &out.NodeInProgress + *out = new(Node) + **out = **in + } if in.CurrentOperationStatus != nil { in, out := &in.CurrentOperationStatus, &out.CurrentOperationStatus *out = new(Operation) **out = **in } + if in.PendingNodes != nil { + in, out := &in.PendingNodes, &out.PendingNodes + *out = make([]*Node, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(Node) + **out = **in + } + } + } + if in.CompletedNodes != nil { + in, out := &in.CompletedNodes, &out.CompletedNodes + *out = make([]*Node, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(Node) + **out = **in + } + } + } + if in.FailedNodes != nil { + in, out := &in.FailedNodes, &out.FailedNodes + *out = make([]*Node, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(Node) + **out = **in + } + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeReloadStatus. diff --git a/config/crd/bases/clusterresources.instaclustr.com_nodereloads.yaml b/config/crd/bases/clusterresources.instaclustr.com_nodereloads.yaml index 49e44db04..141679c0a 100644 --- a/config/crd/bases/clusterresources.instaclustr.com_nodereloads.yaml +++ b/config/crd/bases/clusterresources.instaclustr.com_nodereloads.yaml @@ -50,6 +50,15 @@ spec: status: description: NodeReloadStatus defines the observed state of NodeReload properties: + completedNodes: + items: + properties: + nodeID: + type: string + required: + - nodeID + type: object + type: array currentOperationStatus: properties: message: @@ -68,6 +77,15 @@ spec: - timeCreated - timeModified type: object + failedNodes: + items: + properties: + nodeID: + type: string + required: + - nodeID + type: object + type: array nodeInProgress: properties: nodeID: @@ -75,6 +93,15 @@ spec: required: - nodeID type: object + pendingNodes: + items: + properties: + nodeID: + type: string + required: + - nodeID + type: object + type: array type: object type: object served: true diff --git a/config/samples/clusterresources_v1beta1_nodereload.yaml b/config/samples/clusterresources_v1beta1_nodereload.yaml index 82bdc1607..a26449392 100644 --- a/config/samples/clusterresources_v1beta1_nodereload.yaml +++ b/config/samples/clusterresources_v1beta1_nodereload.yaml @@ -1,8 +1,8 @@ apiVersion: clusterresources.instaclustr.com/v1beta1 kind: NodeReload metadata: - name: nodereload-sample + name: nodereload-sample2 spec: nodes: - - nodeID: "8b5e3cb1-d1ca-400c-92e5-3279a79133a2" - - nodeID: "8de5f5ad-d776-4c19-a322-1efffe149018" + - nodeID: 1dd538b7-4544-4824-83d2-79d1b5817a67 + - nodeID: bf72ce55-bc3b-4a7f-93a9-149d5de49ef5 diff --git a/config/samples/clusters_v1beta1_postgresql.yaml b/config/samples/clusters_v1beta1_postgresql.yaml index 11c3b5859..23cdef327 100644 --- a/config/samples/clusters_v1beta1_postgresql.yaml +++ b/config/samples/clusters_v1beta1_postgresql.yaml @@ -6,7 +6,7 @@ metadata: # annotations: # testAnnotation: test spec: - name: "username-test" + name: "bohdan-test" version: "15.4.0" dataCentres: - region: "US_WEST_2" diff --git a/controllers/clusterresources/nodereload_controller.go b/controllers/clusterresources/nodereload_controller.go index 43865122d..7e7723388 100644 --- a/controllers/clusterresources/nodereload_controller.go +++ b/controllers/clusterresources/nodereload_controller.go @@ -18,6 +18,7 @@ package clusterresources import ( "context" + "errors" k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" @@ -28,6 +29,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/instaclustr/operator/apis/clusterresources/v1beta1" "github.com/instaclustr/operator/pkg/instaclustr" @@ -42,6 +44,10 @@ type NodeReloadReconciler struct { EventRecorder record.EventRecorder } +const ( + operationStatusCompleted = "COMPLETED" +) + //+kubebuilder:rbac:groups=clusterresources.instaclustr.com,resources=nodereloads,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=clusterresources.instaclustr.com,resources=nodereloads/status,verbs=get;update;patch //+kubebuilder:rbac:groups=clusterresources.instaclustr.com,resources=nodereloads/finalizers,verbs=update @@ -66,43 +72,49 @@ func (r *NodeReloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) return models.ReconcileRequeue, err } - if len(nrs.Spec.Nodes) == 0 { - err = r.Client.Delete(ctx, nrs) + patch := nrs.NewPatch() + if len(nrs.Status.PendingNodes) == 0 && + len(nrs.Status.CompletedNodes) == 0 && + len(nrs.Status.FailedNodes) == 0 { + nrs.Status.PendingNodes = nrs.Spec.Nodes + err = r.Status().Patch(ctx, nrs, patch) if err != nil { - l.Error(err, - "Cannot delete Node Reload resource from K8s cluster", - "Node Reload spec", nrs.Spec, - ) - r.EventRecorder.Eventf( - nrs, models.Warning, models.DeletionFailed, - "Resource deletion is failed. Reason: %v", - err, + l.Error(err, "Cannot set pending nodes to the status") + r.EventRecorder.Event(nrs, models.Warning, models.PatchFailed, + "Cannot set pending nodes to the resource status", ) + return models.ReconcileRequeue, nil } + } + + if len(nrs.Status.PendingNodes) == 0 { r.EventRecorder.Eventf( - nrs, models.Normal, models.DeletionStarted, - "Resource is deleted.", + nrs, models.Normal, models.UpdatedEvent, + "Nodes were successfully reloaded", ) l.Info( - "Nodes were reloaded, resource was deleted", + "Nodes were successfully reloaded", "Node Reload spec", nrs.Spec, ) + return models.ExitReconcile, nil } - patch := nrs.NewPatch() - if nrs.Status.NodeInProgress.ID == "" { - nodeInProgress := &v1beta1.Node{ - ID: nrs.Spec.Nodes[len(nrs.Spec.Nodes)-1].ID, - } - nrs.Status.NodeInProgress.ID = nodeInProgress.ID + currentNode := nrs.Status.PendingNodes[len(nrs.Status.PendingNodes)-1] - err = r.API.CreateNodeReload(nodeInProgress) + if nrs.Status.NodeInProgress == nil { + nrs.Status.NodeInProgress = currentNode + + err = r.API.CreateNodeReload(currentNode) if err != nil { + if errors.Is(err, instaclustr.NotFound) { + return r.handleNodeNotFound(ctx, currentNode, nrs) + } + l.Error(err, "Cannot start Node Reload process", - "nodeID", nodeInProgress.ID, + "nodeID", currentNode.ID, ) r.EventRecorder.Eventf( nrs, models.Warning, models.CreationFailed, @@ -119,6 +131,7 @@ func (r *NodeReloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) ) err = r.Status().Patch(ctx, nrs, patch) + nrs.Status.PendingNodes = nrs.Status.PendingNodes[:len(nrs.Status.PendingNodes)-1] if err != nil { l.Error(err, "Cannot patch Node Reload status", @@ -135,6 +148,10 @@ func (r *NodeReloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) nodeReloadStatus, err := r.API.GetNodeReloadStatus(nrs.Status.NodeInProgress.ID) if err != nil { + if errors.Is(err, instaclustr.NotFound) { + return r.handleNodeNotFound(ctx, currentNode, nrs) + } + l.Error(err, "Cannot get Node Reload status", "nodeID", nrs.Status.NodeInProgress, @@ -147,7 +164,7 @@ func (r *NodeReloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) return models.ReconcileRequeue, nil } - nrs.Status = *nrs.Status.FromInstAPI(nodeReloadStatus) + nrs.Status.FromInstAPI(nodeReloadStatus) err = r.Status().Patch(ctx, nrs, patch) if err != nil { l.Error(err, @@ -162,7 +179,7 @@ func (r *NodeReloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) return models.ReconcileRequeue, nil } - if nrs.Status.CurrentOperationStatus.Status != "COMPLETED" { + if nrs.Status.CurrentOperationStatus.Status != operationStatusCompleted { l.Info("Node Reload operation is not completed yet, please wait a few minutes", "nodeID", nrs.Status.NodeInProgress, "status", nrs.Status, @@ -170,7 +187,20 @@ func (r *NodeReloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) return models.ReconcileRequeue, nil } - nrs.Status.NodeInProgress.ID = "" + l.Info("The node has been successfully reloaded", + "Node ID", nrs.Status.NodeInProgress.ID, + ) + r.EventRecorder.Eventf(nrs, models.Normal, models.UpdatedEvent, + "Node %s has been successfully reloaded", nrs.Status.NodeInProgress.ID, + ) + + patch = nrs.NewPatch() + + nrs.Status.NodeInProgress = nil + nrs.Status.CurrentOperationStatus = nil + nrs.Status.CompletedNodes = append(nrs.Status.CompletedNodes, currentNode) + nrs.Status.PendingNodes = nrs.Status.PendingNodes[:len(nrs.Status.PendingNodes)-1] + err = r.Status().Patch(ctx, nrs, patch) if err != nil { l.Error(err, @@ -185,21 +215,33 @@ func (r *NodeReloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) return models.ReconcileRequeue, nil } - nrs.Spec.Nodes = nrs.Spec.Nodes[:len(nrs.Spec.Nodes)-1] - err = r.Patch(ctx, nrs, patch) + return reconcile.Result{Requeue: true}, nil +} + +func (r *NodeReloadReconciler) handleNodeNotFound(ctx context.Context, node *v1beta1.Node, nrs *v1beta1.NodeReload) (reconcile.Result, error) { + l := log.FromContext(ctx) + + patch := nrs.NewPatch() + nrs.Status.FailedNodes = append(nrs.Status.FailedNodes, node) + nrs.Status.PendingNodes = nrs.Status.PendingNodes[:len(nrs.Status.PendingNodes)-1] + err := r.Status().Patch(ctx, nrs, patch) if err != nil { - l.Error(err, "Cannot patch Node Reload cluster", - "spec", nrs.Spec, - ) - r.EventRecorder.Eventf( - nrs, models.Warning, models.PatchFailed, - "Resource patch is failed. Reason: %v", - err, + l.Error(err, "Cannot patch failed node") + r.EventRecorder.Event(nrs, models.Warning, models.PatchFailed, + "Cannot patch failed node", ) + return models.ReconcileRequeue, nil } - return models.ExitReconcile, nil + l.Error(err, "Node is not found on Instaclustr", + "Node ID", node.ID, + ) + r.EventRecorder.Eventf(nrs, models.Warning, models.FetchFailed, + "Node %s is not found on Instaclustr", node.ID, + ) + + return models.ReconcileRequeue, nil } // SetupWithManager sets up the controller with the Manager. diff --git a/main.go b/main.go index ac0ecfd6c..967a4a7bb 100644 --- a/main.go +++ b/main.go @@ -68,7 +68,7 @@ func main() { flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") - flag.DurationVar(&scheduler.ClusterStatusInterval, "cluster-status-interval", 60*time.Second, + flag.DurationVar(&scheduler.ClusterStatusInterval, "cluster-status-interval", 5*time.Second, "An interval to check cluster status") flag.DurationVar(&scheduler.ClusterBackupsInterval, "cluster-backups-interval", 60*time.Second, "An interval to check cluster backups") diff --git a/pkg/instaclustr/client.go b/pkg/instaclustr/client.go index bfa9ac122..82f8fe97b 100644 --- a/pkg/instaclustr/client.go +++ b/pkg/instaclustr/client.go @@ -1476,6 +1476,10 @@ func (c *Client) CreateNodeReload(nr *clusterresourcesv1beta1.Node) error { return err } + if resp.StatusCode == http.StatusNotFound { + return NotFound + } + if resp.StatusCode != http.StatusAccepted { return fmt.Errorf("status code: %d, message: %s", resp.StatusCode, body) }