diff --git a/e2e/k8s/kindConfigDefaultPorts.yaml b/e2e/k8s/kindConfigDefaultPorts.yaml index 8b6ca8f480f..04ba34dd347 100644 --- a/e2e/k8s/kindConfigDefaultPorts.yaml +++ b/e2e/k8s/kindConfigDefaultPorts.yaml @@ -3,6 +3,7 @@ name: k8s-env apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane + image: kindest/node:v1.28.12@sha256:fa0e48b1e83bb8688a5724aa7eebffbd6337abd7909ad089a2700bf08c30c6ea extraPortMappings: - containerPort: 30200 # Map internal elasticsearch service to host port hostPort: 9200 diff --git a/e2e/k8s/kindConfigDefaultPortsDev.yaml b/e2e/k8s/kindConfigDefaultPortsDev.yaml index 0e6f8115213..f56ae0474a9 100644 --- a/e2e/k8s/kindConfigDefaultPortsDev.yaml +++ b/e2e/k8s/kindConfigDefaultPortsDev.yaml @@ -7,6 +7,7 @@ name: k8s-env apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane + image: kindest/node:v1.28.12@sha256:fa0e48b1e83bb8688a5724aa7eebffbd6337abd7909ad089a2700bf08c30c6ea extraPortMappings: - containerPort: 30200 # Map internal elasticsearch service to host port hostPort: 9200 diff --git a/e2e/k8s/kindConfigTestPorts.yaml b/e2e/k8s/kindConfigTestPorts.yaml index 46b196411c6..d0f183f9801 100644 --- a/e2e/k8s/kindConfigTestPorts.yaml +++ b/e2e/k8s/kindConfigTestPorts.yaml @@ -3,6 +3,7 @@ name: k8s-e2e apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane + image: kindest/node:v1.28.12@sha256:fa0e48b1e83bb8688a5724aa7eebffbd6337abd7909ad089a2700bf08c30c6ea extraPortMappings: - containerPort: 30200 # Map internal elasticsearch service to host port hostPort: 49200 diff --git a/package.json b/package.json index 1df7930760b..9d7d378819f 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "teraslice-workspace", "displayName": "Teraslice", - "version": "2.3.1", + "version": "2.3.2", "private": true, "homepage": "https://github.com/terascope/teraslice", "bugs": { diff --git a/packages/job-components/package.json b/packages/job-components/package.json index b83e020c967..1d69500f18a 100644 --- a/packages/job-components/package.json +++ b/packages/job-components/package.json @@ -1,7 +1,7 @@ { "name": "@terascope/job-components", "displayName": "Job Components", - "version": "1.3.0", + "version": "1.3.1", "description": "A teraslice library for validating jobs schemas, registering apis, and defining and running new Job APIs", "homepage": "https://github.com/terascope/teraslice/tree/master/packages/job-components#readme", "bugs": { diff --git a/packages/scripts/package.json b/packages/scripts/package.json index 4bd3df97e78..080b869f01f 100644 --- a/packages/scripts/package.json +++ b/packages/scripts/package.json @@ -1,7 +1,7 @@ { "name": "@terascope/scripts", "displayName": "Scripts", - "version": "1.1.1", + "version": "1.1.2", "description": "A collection of terascope monorepo scripts", "homepage": "https://github.com/terascope/teraslice/tree/master/packages/scripts#readme", "bugs": { diff --git a/packages/teraslice-test-harness/package.json b/packages/teraslice-test-harness/package.json index 493e4060bce..825898af9fc 100644 --- a/packages/teraslice-test-harness/package.json +++ b/packages/teraslice-test-harness/package.json @@ -36,10 +36,10 @@ "fs-extra": "^11.2.0" }, "devDependencies": { - "@terascope/job-components": "^1.3.0" + "@terascope/job-components": "^1.3.1" }, "peerDependencies": { - "@terascope/job-components": ">=1.3.0" + "@terascope/job-components": ">=1.3.1" }, "engines": { "node": ">=18.18.0", diff --git a/packages/teraslice/package.json b/packages/teraslice/package.json index 3021d5e7b25..4a69648f45d 100644 --- a/packages/teraslice/package.json +++ b/packages/teraslice/package.json @@ -1,7 +1,7 @@ { "name": "teraslice", "displayName": "Teraslice", - "version": "2.3.1", + "version": "2.3.2", "description": "Distributed computing platform for processing JSON data", "homepage": "https://github.com/terascope/teraslice#readme", "bugs": { @@ -40,7 +40,7 @@ "dependencies": { "@kubernetes/client-node": "^0.21.0", "@terascope/elasticsearch-api": "^4.1.0", - "@terascope/job-components": "^1.3.0", + "@terascope/job-components": "^1.3.1", "@terascope/teraslice-messaging": "^1.4.0", "@terascope/types": "^1.1.0", "@terascope/utils": "^1.1.0", diff --git a/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetes/index.ts b/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetes/index.ts index 6e6ba2e63bb..34c92330c85 100644 --- a/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetes/index.ts +++ b/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetes/index.ts @@ -227,7 +227,7 @@ export class KubernetesClusterBackend { */ async listResourcesForJobId(jobId: string) { const resources = []; - const resourceTypes = ['pods', 'deployments', 'services', 'jobs']; + const resourceTypes = ['pods', 'deployments', 'services', 'jobs', 'replicasets']; for (const type of resourceTypes) { const list = await this.k8s.list(`teraslice.terascope.io/jobId=${jobId}`, type); if (list.items.length > 0) { diff --git a/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetes/k8s.ts b/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetes/k8s.ts index f7990b02126..48eb133998f 100644 --- a/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetes/k8s.ts +++ b/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetes/k8s.ts @@ -6,6 +6,7 @@ import KubeClient from 'kubernetes-client'; // @ts-expect-error import Request from 'kubernetes-client/backends/request/index.js'; import { getRetryConfig } from './utils.js'; +import { IncomingMessage } from 'node:http'; // @ts-expect-error const { Client, KubeConfig } = KubeClient; @@ -98,7 +99,7 @@ export class K8s { const namespace = ns || this.defaultNamespace; let now = Date.now(); const end = now + timeout; - + while (true) { const result = await pRetry(() => this.client .api.v1.namespaces(namespace).pods() @@ -136,7 +137,7 @@ export class K8s { const namespace = ns || this.defaultNamespace; let now = Date.now(); const end = now + timeout; - + while (true) { const result = await pRetry(() => this.client .api.v1.namespaces(namespace).pods() @@ -163,7 +164,7 @@ export class K8s { * returns list of k8s objects matching provided selector * @param {String} selector kubernetes selector, like 'app=teraslice' * @param {String} objType Type of k8s object to get, valid options: - * 'pods', 'deployment', 'services', 'jobs' + * 'pods', 'deployment', 'services', 'jobs', 'replicasets' * @param {String} ns namespace to search, this will override the default * @return {Object} body of k8s get response. */ @@ -188,6 +189,10 @@ export class K8s { response = await pRetry(() => this.client .apis.batch.v1.namespaces(namespace).jobs() .get({ qs: { labelSelector: selector } }), getRetryConfig()); + } else if (objType === 'replicasets') { + response = await pRetry(() => this.client + .apis.apps.v1.namespaces(namespace).replicasets() + .get({ qs: { labelSelector: selector } }), getRetryConfig()); } else { const error = new Error(`Wrong objType provided to get: ${objType}`); this.logger.error(error); @@ -298,12 +303,15 @@ export class K8s { * Deletes k8s object of specified objType * @param {String} name Name of the resource to delete * @param {String} objType Type of k8s object to get, valid options: - * 'deployments', 'services', 'jobs' + * 'deployments', 'services', 'jobs', 'pods', 'replicasets' * @param {Boolean} force Forcefully delete resource by setting gracePeriodSeconds to 1 - * to be forcefully stopped. * @return {Object} k8s delete response body. */ async delete(name: string, objType: string, force?: boolean) { + if (name === undefined || name.trim() === '') { + throw new Error(`Name of resource to delete must be specified. Received: "${name}".`); + } + let response; // To get a Job to remove the associated pods you have to @@ -321,23 +329,53 @@ export class K8s { deleteOptions.body.gracePeriodSeconds = 1; } + const deleteWithErrorHandling = async (deleteFn: () => Promise<{ + response: IncomingMessage, + body: Record + }>) => { + try { + const res = await deleteFn(); + return res; + } catch (e) { + if (e.statusCode) { + // 404 should be an acceptable response to a delete request, not an error + if (e.statusCode === 404) { + this.logger.info(`No ${objType} with name ${name} found while attempting to delete.`); + return e; + } + + if (e.statusCode >= 400) { + const err = new TSError(`Unexpected response code (${e.statusCode}), when deleting name: ${name}`); + this.logger.error(err); + err.code = e.statusCode.toString(); + return Promise.reject(err); + } + } + throw e; + } + } + try { if (objType === 'services') { - response = await pRetry(() => this.client + response = await pRetry(() => deleteWithErrorHandling(() => this.client .api.v1.namespaces(this.defaultNamespace).services(name) - .delete(), getRetryConfig()); + .delete(deleteOptions)), getRetryConfig()); } else if (objType === 'deployments') { - response = await pRetry(() => this.client + response = await pRetry(() => deleteWithErrorHandling(() => this.client .apis.apps.v1.namespaces(this.defaultNamespace).deployments(name) - .delete(), getRetryConfig()); + .delete(deleteOptions)), getRetryConfig()); } else if (objType === 'jobs') { - response = await pRetry(() => this.client + response = await pRetry(() => deleteWithErrorHandling(() => this.client .apis.batch.v1.namespaces(this.defaultNamespace).jobs(name) - .delete(deleteOptions), getRetryConfig()); + .delete(deleteOptions)), getRetryConfig()); } else if (objType === 'pods') { - response = await pRetry(() => this.client + response = await pRetry(() => deleteWithErrorHandling(() => this.client .api.v1.namespaces(this.defaultNamespace).pods(name) - .delete(deleteOptions), getRetryConfig()); + .delete(deleteOptions)), getRetryConfig()); + } else if (objType === 'replicasets') { + response = await pRetry(() => deleteWithErrorHandling(() => this.client + .apis.apps.v1.namespaces(this.defaultNamespace).replicasets(name) + .delete(deleteOptions)), getRetryConfig()); } else { throw new Error(`Invalid objType: ${objType}`); } @@ -347,20 +385,14 @@ export class K8s { return Promise.reject(err); } - if (response.statusCode >= 400) { - const err = new TSError(`Unexpected response code (${response.statusCode}), when deleting name: ${name}`); - this.logger.error(err); - err.code = response.statusCode; - return Promise.reject(err); - } - return response.body; } /** * Delete all of Kubernetes resources related to the specified exId * @param {String} exId ID of the execution - * @param {Boolean} force Forcefully stop all related pod, deployment, and job resources + * @param {Boolean} force Forcefully stop all pod, deployment, + * service, replicaset and job resources * @return {Promise} */ async deleteExecution(exId: string, force = false) { @@ -368,23 +400,32 @@ export class K8s { throw new Error('deleteExecution requires an executionId'); } + if (force) { + // Order matters. If we delete a parent resource before its children it + // will be marked for background deletion and then can't be force deleted. + await this._deleteObjByExId(exId, 'worker', 'pods', force); + await this._deleteObjByExId(exId, 'worker', 'replicasets', force); + await this._deleteObjByExId(exId, 'worker', 'deployments', force); + await this._deleteObjByExId(exId, 'execution_controller', 'pods', force); + await this._deleteObjByExId(exId, 'execution_controller', 'services', force); + } + await this._deleteObjByExId(exId, 'execution_controller', 'jobs', force); } /** - * Finds the k8s object by nodeType and exId and then deletes it + * Finds the k8s objects by nodeType and exId and then deletes them * @param {String} exId Execution ID * @param {String} nodeType valid Teraslice k8s node type: * 'worker', 'execution_controller' * @param {String} objType valid object type: `services`, `deployments`, - * 'jobs' - * @param {Boolean} force Forcefully stop all related pod, deployment, and job resources + * `jobs`, `pods`, `replicasets` + * @param {Boolean} force Forcefully stop all resources * @return {Promise} */ async _deleteObjByExId(exId: string, nodeType: string, objType: string, force?: boolean) { let objList; - let forcePodsList; - let deleteResponse; + const deleteResponses = []; try { objList = await this.list(`app.kubernetes.io/component=${nodeType},teraslice.terascope.io/exId=${exId}`, objType); @@ -394,52 +435,38 @@ export class K8s { return Promise.reject(err); } - if (force) { - try { - forcePodsList = await this.list(`teraslice.terascope.io/exId=${exId}`, 'pods'); - } catch (e) { - const err = new Error(`Request pods list in _deleteObjByExId with exId: ${exId} failed with: ${e}`); - this.logger.error(err); - return Promise.reject(err); - } - } - - if (isEmpty(objList.items) && isEmpty(forcePodsList?.items)) { + if (isEmpty(objList.items)) { this.logger.info(`k8s._deleteObjByExId: ${exId} ${nodeType} ${objType} has already been deleted`); return Promise.resolve(); } - const deletePodResponses = []; - if (forcePodsList?.items) { - this.logger.info(`k8s._deleteObjByExId: ${exId} force deleting all pods`); - for (const pod of forcePodsList.items) { - const podName = pod.metadata.name; - - try { - deletePodResponses.push(await this.delete(podName, 'pods', force)); - } catch (e) { - const err = new Error(`Request k8s.delete in _deleteObjByExId with name: ${podName} failed with: ${e}`); - this.logger.error(err); - return Promise.reject(err); - } + for (const obj of objList.items) { + const { name, deletionTimestamp } = obj.metadata; + + if (!name) { + const err = new Error(`Cannot delete ${objType} for ExId: ${exId} by name because it has no name`); + this.logger.error(err); + return Promise.reject(err); } - } - const name = get(objList, 'items[0].metadata.name'); - this.logger.info(`k8s._deleteObjByExId: ${exId} ${nodeType} ${objType} deleting: ${name}`); + // If deletionTimestamp is present then the resource is already terminating. + // K8s will not change the grace period in this case, so force deletion is not possible + if (force && deletionTimestamp) { + this.logger.warn(`Cannot force delete ${name} for ExId: ${exId}. It will finish deleting gracefully by ${deletionTimestamp}`); + return Promise.resolve(); + } - try { - deleteResponse = await this.delete(name, objType, force); - } catch (e) { - const err = new Error(`Request k8s.delete in _deleteObjByExId with name: ${name} failed with: ${e}`); - this.logger.error(err); - return Promise.reject(err); + this.logger.info(`k8s._deleteObjByExId: ${exId} ${nodeType} ${objType} ${force ? 'force' : ''} deleting: ${name}`); + try { + deleteResponses.push(await this.delete(name, objType, force)); + } catch (e) { + const err = new Error(`Request k8s.delete in _deleteObjByExId with name: ${name} failed with: ${e}`); + this.logger.error(err); + return Promise.reject(err); + } } - if (deletePodResponses.length > 0) { - deleteResponse.deletePodResponses = deletePodResponses; - } - return deleteResponse; + return deleteResponses; } /** diff --git a/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetesV2/index.ts b/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetesV2/index.ts index 2bf74b0cbad..3705d9dea83 100644 --- a/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetesV2/index.ts +++ b/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetesV2/index.ts @@ -259,11 +259,12 @@ export class KubernetesClusterBackendV2 { /** * Returns a list of all k8s resources associated with a job ID * @param {string} jobId The job ID of the job to list associated resources - * @returns {Array} + * @returns {Array} */ async listResourcesForJobId(jobId: string) { const resources = []; - const resourceTypes = ['pods', 'deployments', 'services', 'jobs']; + const resourceTypes = ['pods', 'deployments', 'services', 'jobs', 'replicasets']; for (const type of resourceTypes) { const list = await this.k8s.list(`teraslice.terascope.io/jobId=${jobId}`, type); if (list.items.length > 0) { diff --git a/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetesV2/k8s.ts b/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetesV2/k8s.ts index 3e429462c96..a1276675ba9 100644 --- a/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetesV2/k8s.ts +++ b/packages/teraslice/src/lib/cluster/services/cluster/backends/kubernetesV2/k8s.ts @@ -15,7 +15,7 @@ interface KubeConfigOptions { type K8sObjectList = k8s.V1DeploymentList | k8s.V1ServiceList - | k8s.V1JobList | k8s.V1PodList; + | k8s.V1JobList | k8s.V1PodList | k8s.V1ReplicaSetList; export class K8s { logger: Logger; @@ -165,7 +165,7 @@ export class K8s { * returns list of k8s objects matching provided selector * @param {String} selector kubernetes selector, like 'app=teraslice' * @param {String} objType Type of k8s object to get, valid options: - * 'pods', 'deployment', 'services', 'jobs' + * 'pods', 'deployment', 'services', 'jobs', 'replicasets' * @param {String} ns namespace to search, this will override the default * @return {Object} body of k8s get response. */ @@ -173,46 +173,52 @@ export class K8s { const namespace = ns || this.defaultNamespace; let responseObj: { response: IncomingMessage, - body: k8s.V1PodList | k8s.V1DeploymentList | k8s.V1ServiceList | k8s.V1JobList + body: k8s.V1PodList | k8s.V1DeploymentList + | k8s.V1ServiceList | k8s.V1JobList | k8s.V1ReplicaSetList }; + const params: [ + string, + string | undefined, + boolean | undefined, + string | undefined, + string | undefined, + string + ] = [ + namespace, + undefined, + undefined, + undefined, + undefined, + selector + ] + try { if (objType === 'pods') { - responseObj = await pRetry(() => this.k8sCoreV1Api.listNamespacedPod( - namespace, - undefined, - undefined, - undefined, - undefined, - selector - ), getRetryConfig()); + responseObj = await pRetry( + () => this.k8sCoreV1Api.listNamespacedPod(...params), + getRetryConfig() + ); } else if (objType === 'deployments') { - responseObj = await pRetry(() => this.k8sAppsV1Api.listNamespacedDeployment( - namespace, - undefined, - undefined, - undefined, - undefined, - selector - ), getRetryConfig()); + responseObj = await pRetry( + () => this.k8sAppsV1Api.listNamespacedDeployment(...params), + getRetryConfig() + ); } else if (objType === 'services') { - responseObj = await pRetry(() => this.k8sCoreV1Api.listNamespacedService( - namespace, - undefined, - undefined, - undefined, - undefined, - selector - ), getRetryConfig()); + responseObj = await pRetry( + () => this.k8sCoreV1Api.listNamespacedService(...params), + getRetryConfig() + ); } else if (objType === 'jobs') { - responseObj = await pRetry(() => this.k8sBatchV1Api.listNamespacedJob( - namespace, - undefined, - undefined, - undefined, - undefined, - selector - ), getRetryConfig()); + responseObj = await pRetry( + () => this.k8sBatchV1Api.listNamespacedJob(...params), + getRetryConfig() + ); + } else if (objType === 'replicasets') { + responseObj = await pRetry( + () => this.k8sAppsV1Api.listNamespacedReplicaSet(...params), + getRetryConfig() + ); } else { const error = new Error(`Wrong objType provided to get: ${objType}`); this.logger.error(error); @@ -339,12 +345,16 @@ export class K8s { * Deletes k8s object of specified objType * @param {String} name Name of the resource to delete * @param {String} objType Type of k8s object to get, valid options: - * 'deployments', 'services', 'jobs' + * 'deployments', 'services', 'jobs', 'pods' * @param {Boolean} force Forcefully delete resource by setting gracePeriodSeconds to 1 * to be forcefully stopped. * @return {Object} k8s delete response body. */ async delete(name: string, objType: string, force?: boolean) { + if (name === undefined || name.trim() === '') { + throw new Error(`Name of resource to delete must be specified. Received: "${name}".`); + } + let responseObj: { response: IncomingMessage, body: k8s.V1Status | k8s.V1Pod | k8s.V1Service @@ -363,55 +373,68 @@ export class K8s { deleteOptions.gracePeriodSeconds = 1; } + const params: [ + string, + string, + string | undefined, + string | undefined, + number | undefined, + boolean | undefined, + string | undefined, + k8s.V1DeleteOptions | undefined + ] = [ + name, + this.defaultNamespace, + undefined, + undefined, + undefined, + undefined, + undefined, + deleteOptions + ] + + const deleteWithErrorHandling = async (deleteFn: () => Promise<{ + response: IncomingMessage, + body: k8s.V1Status | k8s.V1Pod | k8s.V1Service + }>) => { + try { + const res = await deleteFn(); + return res; + } catch (e) { + if (e.statusCode) { + // 404 should be an acceptable response to a delete request, not an error + if (e.statusCode === 404) { + this.logger.info(`No ${objType} with name ${name} found while attempting to delete.`); + return e; + } + + if (e.statusCode >= 400) { + const err = new TSError(`Unexpected response code (${e.statusCode}), when deleting name: ${name}`); + this.logger.error(err); + err.code = e.statusCode.toString(); + return Promise.reject(err); + } + } + throw e; + } + } + try { if (objType === 'services') { - responseObj = await pRetry(() => this.k8sCoreV1Api - .deleteNamespacedService( - name, - this.defaultNamespace, - undefined, - undefined, - undefined, - undefined, - undefined, - deleteOptions - ), getRetryConfig()); + responseObj = await pRetry(() => deleteWithErrorHandling(() => this.k8sCoreV1Api + .deleteNamespacedService(...params)), getRetryConfig()); } else if (objType === 'deployments') { - responseObj = await pRetry(() => this.k8sAppsV1Api - .deleteNamespacedDeployment( - name, - this.defaultNamespace, - undefined, - undefined, - undefined, - undefined, - undefined, - deleteOptions - ), getRetryConfig()); + responseObj = await pRetry(() => deleteWithErrorHandling(() => this.k8sAppsV1Api + .deleteNamespacedDeployment(...params)), getRetryConfig()); } else if (objType === 'jobs') { - responseObj = await pRetry(() => this.k8sBatchV1Api - .deleteNamespacedJob( - name, - this.defaultNamespace, - undefined, - undefined, - undefined, - undefined, - undefined, - deleteOptions - ), getRetryConfig()); + responseObj = await pRetry(() => deleteWithErrorHandling(() => this.k8sBatchV1Api + .deleteNamespacedJob(...params)), getRetryConfig()); } else if (objType === 'pods') { - responseObj = await pRetry(() => this.k8sCoreV1Api - .deleteNamespacedPod( - name, - this.defaultNamespace, - undefined, - undefined, - undefined, - undefined, - undefined, - deleteOptions - ), getRetryConfig()); + responseObj = await pRetry(() => deleteWithErrorHandling(() => this.k8sCoreV1Api + .deleteNamespacedPod(...params)), getRetryConfig()); + } else if (objType === 'replicasets') { + responseObj = await pRetry(() => deleteWithErrorHandling(() => this.k8sAppsV1Api + .deleteNamespacedReplicaSet(...params)), getRetryConfig()); } else { throw new Error(`Invalid objType: ${objType}`); } @@ -421,20 +444,14 @@ export class K8s { return Promise.reject(err); } - if (responseObj.response.statusCode && responseObj.response.statusCode >= 400) { - const err = new TSError(`Unexpected response code (${responseObj.response.statusCode}), when deleting name: ${name}`); - this.logger.error(err); - err.code = responseObj.response.statusCode.toString(); - return Promise.reject(err); - } - return responseObj.body; } /** * Delete all of Kubernetes resources related to the specified exId * @param {String} exId ID of the execution - * @param {Boolean} force Forcefully stop all related pod, deployment, and job resources + * @param {Boolean} force Forcefully stop all pod, deployment, + * service, replicaset and job resources * @return {Promise} */ async deleteExecution(exId: string, force = false) { @@ -442,23 +459,34 @@ export class K8s { throw new Error('deleteExecution requires an executionId'); } + if (force) { + // Order matters. If we delete a parent resource before its children it + // will be marked for background deletion and then can't be force deleted. + await this._deleteObjByExId(exId, 'worker', 'pods', force); + await this._deleteObjByExId(exId, 'worker', 'replicasets', force); + await this._deleteObjByExId(exId, 'worker', 'deployments', force); + await this._deleteObjByExId(exId, 'execution_controller', 'pods', force); + await this._deleteObjByExId(exId, 'execution_controller', 'services', force); + } + await this._deleteObjByExId(exId, 'execution_controller', 'jobs', force); } /** - * Finds the k8s object by nodeType and exId and then deletes it + * Finds the k8s objects by nodeType and exId and then deletes them * @param {String} exId Execution ID * @param {String} nodeType valid Teraslice k8s node type: * 'worker', 'execution_controller' * @param {String} objType valid object type: `services`, `deployments`, - * 'jobs' - * @param {Boolean} force Forcefully stop all related pod, deployment, and job resources + * `jobs`, `pods`, `replicasets` + * @param {Boolean} force Forcefully stop all resources * @return {Promise} */ async _deleteObjByExId(exId: string, nodeType: string, objType: string, force?: boolean) { let objList: K8sObjectList; - let forcePodsList: k8s.V1PodList | undefined; - const deleteResponses: Array = []; + const deleteResponses: Array< + k8s.V1Pod[] | k8s.V1Pod | k8s.V1Service | k8s.V1Status | k8s.V1ReplicaSet + > = []; try { objList = await this.list(`app.kubernetes.io/component=${nodeType},teraslice.terascope.io/exId=${exId}`, objType); @@ -468,64 +496,38 @@ export class K8s { return Promise.reject(err); } - if (force) { - try { - forcePodsList = await this.list(`teraslice.terascope.io/exId=${exId}`, 'pods') as k8s.V1PodList; - } catch (e) { - const err = new Error(`Request pods list in _deleteObjByExId with exId: ${exId} failed with: ${e}`); - this.logger.error(err); - return Promise.reject(err); - } - } - - if (isEmpty(objList.items) && isEmpty(forcePodsList?.items)) { + if (isEmpty(objList.items)) { this.logger.info(`k8s._deleteObjByExId: ${exId} ${nodeType} ${objType} has already been deleted`); return Promise.resolve(); } - const deletePodResponses: k8s.V1Pod[] = []; - if (forcePodsList?.items) { - this.logger.info(`k8s._deleteObjByExId: ${exId} force deleting all pods`); - for (const pod of forcePodsList.items) { - if (!pod.metadata) { - this.logger.error('Cannot delete pod by metadata.name because it has no metadata'); - continue; - } - if (!pod.metadata.name) { - this.logger.error(`Cannot delete pod with labels ${pod.metadata.labels} by name because it has no name`); - continue; - } - const podName = pod.metadata.name; - try { - const V1Pod = await this.delete(podName, 'pods', force) as k8s.V1Pod; - deletePodResponses.push(V1Pod); - } catch (e) { - const err = new Error(`Request k8s.delete in _deleteObjByExId with name: ${podName} failed with: ${e}`); - this.logger.error(err); - return Promise.reject(err); - } + for (const obj of objList.items) { + const name = obj.metadata?.name; + const deletionTimestamp = obj.metadata?.deletionTimestamp; + + if (!name) { + const err = new Error(`Cannot delete ${objType} for ExId: ${exId} by name because it has no name`); + this.logger.error(err); + return Promise.reject(err); } - } - const name = objList.items[0].metadata?.name; - if (name === undefined) { - const err = new Error(`Cannot delete ${objType} for ExId: ${exId} by name because it has no name`); - this.logger.error(err); - return Promise.reject(err); - } - this.logger.info(`k8s._deleteObjByExId: ${exId} ${nodeType} ${objType} deleting: ${name}`); + // If deletionTimestamp is present then the resource is already terminating. + // K8s will not change the grace period in this case, so force deletion is not possible + if (force && deletionTimestamp) { + this.logger.warn(`Cannot force delete ${name} for ExId: ${exId}. It will finish deleting gracefully by ${deletionTimestamp}`); + return Promise.resolve(); + } - try { - deleteResponses.push(await this.delete(name, objType, force)); - } catch (e) { - const err = new Error(`Request k8s.delete in _deleteObjByExId with name: ${name} failed with: ${e}`); - this.logger.error(err); - return Promise.reject(err); + this.logger.info(`k8s._deleteObjByExId: ${exId} ${nodeType} ${objType} ${force ? 'force' : ''} deleting: ${name}`); + try { + deleteResponses.push(await this.delete(name, objType, force)); + } catch (e) { + const err = new Error(`Request k8s.delete in _deleteObjByExId with name: ${name} failed with: ${e}`); + this.logger.error(err); + return Promise.reject(err); + } } - if (deletePodResponses.length > 0) { - deleteResponses.push(deletePodResponses); - } return deleteResponses; } diff --git a/packages/teraslice/src/lib/cluster/services/jobs.ts b/packages/teraslice/src/lib/cluster/services/jobs.ts index 637560fa53d..bd24ed89bcd 100644 --- a/packages/teraslice/src/lib/cluster/services/jobs.ts +++ b/packages/teraslice/src/lib/cluster/services/jobs.ts @@ -162,8 +162,7 @@ export class JobsService { const exIdsArr = Array.from(exIdsSet); const exIdsString = exIdsArr.join(', '); throw new TSError(`There are orphaned resources for job: ${jobId}, exId: ${exIdsString}. - To remove orphaned resources: - curl -XPOST /v1/jobs/${jobId}/_stop?force=true`); + Please wait for Kubernetes to clean up orphaned resources.`); } const jobSpec = await this.jobsStorage.get(jobId); diff --git a/packages/teraslice/test/lib/cluster/services/cluster/backends/kubernetes/k8s-spec.ts b/packages/teraslice/test/lib/cluster/services/cluster/backends/kubernetes/k8s-spec.ts index 5f1cc8df24b..d228fff7004 100644 --- a/packages/teraslice/test/lib/cluster/services/cluster/backends/kubernetes/k8s-spec.ts +++ b/packages/teraslice/test/lib/cluster/services/cluster/backends/kubernetes/k8s-spec.ts @@ -103,6 +103,16 @@ describe('k8s', () => { const jobs = await k8s.list('app=teraslice', 'jobs'); expect(jobs.kind).toEqual('JobList'); }); + + it('can get ReplicaSetList', async () => { + nock(_url) + .get('/apis/apps/v1/namespaces/default/replicasets/') + .query({ labelSelector: 'app=teraslice' }) + .reply(200, { kind: 'ReplicaSetList' }); + + const jobs = await k8s.list('app=teraslice', 'replicasets'); + expect(jobs.kind).toEqual('ReplicaSetList'); + }); }); describe('->nonEmptyList', () => { @@ -170,10 +180,20 @@ describe('k8s', () => { }); describe('->delete', () => { + it('will throw if name is undefined', async () => { + await expect(k8s.delete(undefined as unknown as string, 'deployments')) + .rejects.toThrow('Name of resource to delete must be specified. Received: "undefined".'); + }); + + it('will throw if name is an empty string', async () => { + await expect(k8s.delete('', 'deployments')) + .rejects.toThrow('Name of resource to delete must be specified. Received: "".'); + }); + it('can delete a deployment by name', async () => { nock(_url) .delete('/apis/apps/v1/namespaces/default/deployments/test1') - .reply(200, { }); + .reply(200, {}); const response = await k8s.delete('test1', 'deployments'); expect(response).toEqual({}); @@ -182,7 +202,7 @@ describe('k8s', () => { it('can delete a service by name', async () => { nock(_url) .delete('/api/v1/namespaces/default/services/test1') - .reply(200, { }); + .reply(200, {}); const response = await k8s.delete('test1', 'services'); expect(response).toEqual({}); @@ -191,7 +211,7 @@ describe('k8s', () => { it('can delete a job by name', async () => { nock(_url) .delete('/apis/batch/v1/namespaces/default/jobs/test1') - .reply(200, { }); + .reply(200, {}); const response = await k8s.delete('test1', 'jobs'); expect(response).toEqual({}); @@ -205,46 +225,106 @@ describe('k8s', () => { const response = await k8s.delete('test1', 'pods'); expect(response).toEqual({}); }); + + it('can delete a replicaset by name', async () => { + nock(_url) + .delete('/apis/apps/v1/namespaces/default/replicasets/test1') + .reply(200, {}); + + const response = await k8s.delete('test1', 'replicasets'); + expect(response).toEqual({}); + }); + + it('will throw on a reponse code >= 400, excluding 404', async () => { + nock(_url) + .delete('/api/v1/namespaces/default/pods/bad-response') + .replyWithError({ statusCode: 400 }) + .delete('/api/v1/namespaces/default/pods/bad-response') + .replyWithError({ statusCode: 400 }) + .delete('/api/v1/namespaces/default/pods/bad-response') + .replyWithError({ statusCode: 400 }); + + await expect(k8s.delete('bad-response', 'pods')) + .rejects.toThrow('Request k8s.delete with name: bad-response failed with: TSError: Unexpected response code (400), when deleting name: bad-response'); + }); + + it('will succeed on a 404 response code', async () => { + const notFoundResponse = { + body: { + kind: 'Status', + apiVersion: 'v1', + metadata: {}, + status: 'Failure', + message: 'pods "non-existent" not found', + reason: 'NotFound', + details: { name: 'non-existent', kind: 'pods' }, + code: 404 + }, + statusCode: 404 + } + nock(_url) + .delete('/api/v1/namespaces/default/pods/non-existent') + .replyWithError(notFoundResponse); + + const response = await k8s.delete('non-existent', 'pods'); + expect(response).toEqual(notFoundResponse.body); + }); }); describe('->_deletObjByExId', () => { - it('can force delete a job', async () => { + it('will throw if name is undefined', async () => { nock(_url) .get('/apis/batch/v1/namespaces/default/jobs/') .query({ labelSelector: /app\.kubernetes\.io\/component=execution_controller,teraslice\.terascope\.io\/exId=.*/ }) .reply(200, { kind: 'JobList', items: [ - { metadata: { name: 'testJob1' } } + { metadata: { name: undefined } } ] }); + await expect(k8s._deleteObjByExId('no-name', 'execution_controller', 'jobs')) + .rejects.toThrow('Cannot delete jobs for ExId: no-name by name because it has no name'); + + }); + + it('can delete a single object', async () => { nock(_url) - .get('/api/v1/namespaces/default/pods/') - .query({ labelSelector: /teraslice\.terascope\.io\/exId=.*/ }) + .get('/apis/batch/v1/namespaces/default/jobs/') + .query({ labelSelector: /app\.kubernetes\.io\/component=execution_controller,teraslice\.terascope\.io\/exId=.*/ }) .reply(200, { - kind: 'PodList', + kind: 'JobList', items: [ - { metadata: { name: 'testEx1' } }, { metadata: { name: 'testWkr1' } } + { metadata: { name: 'testJob1' } } ] }); nock(_url) - .delete('/api/v1/namespaces/default/pods/testEx1') + .delete('/apis/batch/v1/namespaces/default/jobs/testJob1') .reply(200, {}); - nock(_url) - .delete('/api/v1/namespaces/default/pods/testWkr1') - .reply(200, {}); + const response = await k8s._deleteObjByExId('testJob1', 'execution_controller', 'jobs'); + expect(response).toEqual([{}]); + }); + it('can delete a multiple objects', async () => { nock(_url) - .delete('/apis/batch/v1/namespaces/default/jobs/testJob1') + .get('/api/v1/namespaces/default/pods/') + .query({ labelSelector: /app\.kubernetes\.io\/component=worker,teraslice\.terascope\.io\/exId=.*/ }) + .reply(200, { + kind: 'PodList', + items: [ + { metadata: { name: 'testPod1' } }, + { metadata: { name: 'testPod2' } } + ] + }) + .delete('/api/v1/namespaces/default/pods/testPod1') + .reply(200, {}) + .delete('/api/v1/namespaces/default/pods/testPod2') .reply(200, {}); - const response = await k8s._deleteObjByExId('testJob1', 'execution_controller', 'jobs', true); - expect(response).toEqual({ - deletePodResponses: [{}, {}] - }); + const response = await k8s._deleteObjByExId('testPods', 'worker', 'pods'); + expect(response).toEqual([{}, {}]); }); }); diff --git a/packages/teraslice/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8s-v2-spec.ts b/packages/teraslice/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8s-v2-spec.ts index 99e1850a8cc..f7034a48b35 100644 --- a/packages/teraslice/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8s-v2-spec.ts +++ b/packages/teraslice/test/lib/cluster/services/cluster/backends/kubernetes/v2/k8s-v2-spec.ts @@ -103,6 +103,16 @@ describe('k8s', () => { const jobs = await k8s.list('app=teraslice', 'jobs'); expect(jobs.kind).toEqual('JobList'); }); + + it('can get ReplicaSetList', async () => { + nock(_url) + .get('/apis/apps/v1/namespaces/default/replicasets') + .query({ labelSelector: 'app=teraslice' }) + .reply(200, { kind: 'ReplicaSetList' }); + + const jobs = await k8s.list('app=teraslice', 'replicasets'); + expect(jobs.kind).toEqual('ReplicaSetList'); + }); }); describe('->nonEmptyList', () => { @@ -181,10 +191,20 @@ describe('k8s', () => { }); describe('->delete', () => { + it('will throw if name is undefined', async () => { + await expect(k8s.delete(undefined as unknown as string, 'deployments')) + .rejects.toThrow('Name of resource to delete must be specified. Received: "undefined".'); + }); + + it('will throw if name is an empty string', async () => { + await expect(k8s.delete('', 'deployments')) + .rejects.toThrow('Name of resource to delete must be specified. Received: "".'); + }); + it('can delete a deployment by name', async () => { nock(_url) .delete('/apis/apps/v1/namespaces/default/deployments/test1') - .reply(200, { }); + .reply(200, {}); const response = await k8s.delete('test1', 'deployments'); expect(response).toEqual({}); @@ -193,7 +213,7 @@ describe('k8s', () => { it('can delete a service by name', async () => { nock(_url) .delete('/api/v1/namespaces/default/services/test1') - .reply(200, { }); + .reply(200, {}); const response = await k8s.delete('test1', 'services'); expect(response).toEqual({}); @@ -202,7 +222,7 @@ describe('k8s', () => { it('can delete a job by name', async () => { nock(_url) .delete('/apis/batch/v1/namespaces/default/jobs/test1') - .reply(200, { }); + .reply(200, {}); const response = await k8s.delete('test1', 'jobs'); expect(response).toEqual({}); @@ -216,6 +236,50 @@ describe('k8s', () => { const response = await k8s.delete('test1', 'pods'); expect(response).toEqual({}); }); + + it('can delete a replicaset by name', async () => { + nock(_url) + .delete('/apis/apps/v1/namespaces/default/replicasets/test1') + .reply(200, {}); + + const response = await k8s.delete('test1', 'replicasets'); + expect(response).toEqual({}); + }); + + it('will throw on a reponse code >= 400, excluding 404', async () => { + nock(_url) + .delete('/api/v1/namespaces/default/pods/bad-response') + .replyWithError({ statusCode: 400 }) + .delete('/api/v1/namespaces/default/pods/bad-response') + .replyWithError({ statusCode: 400 }) + .delete('/api/v1/namespaces/default/pods/bad-response') + .replyWithError({ statusCode: 400 }); + + await expect(k8s.delete('bad-response', 'pods')) + .rejects.toThrow('Request k8s.delete with name: bad-response failed with: TSError: Unexpected response code (400), when deleting name: bad-response'); + }); + + it('will succeed on a 404 response code', async () => { + const notFoundResponse = { + body: { + kind: 'Status', + apiVersion: 'v1', + metadata: {}, + status: 'Failure', + message: 'pods "non-existent" not found', + reason: 'NotFound', + details: { name: 'non-existent', kind: 'pods' }, + code: 404 + }, + statusCode: 404 + } + nock(_url) + .delete('/api/v1/namespaces/default/pods/non-existent') + .replyWithError(notFoundResponse); + + const response = await k8s.delete('non-existent', 'pods'); + expect(response).toEqual(notFoundResponse.body); + }); }); describe('->_deletObjByExId', () => { @@ -225,16 +289,22 @@ describe('k8s', () => { metadata: { name: 'testJob1' } }; - const exPod = { + const jobNoName = { + apiVersion: '1.0.0', + kind: 'Job', + metadata: { name: undefined } + }; + + const testPod1 = { apiVersion: 'v1', kind: 'Pod', - metadata: { name: 'testEx1' } + metadata: { name: 'testPod1' } }; - const wkrPod = { + const testPod2 = { apiVersion: 'v1', kind: 'Pod', - metadata: { name: 'testWkr1' } + metadata: { name: 'testPod2' } }; const status = { @@ -249,35 +319,55 @@ describe('k8s', () => { status: 'Success' }; - it('can force delete a job', async () => { + it('will throw if name is undefined', async () => { + nock(_url) + .get('/apis/batch/v1/namespaces/default/jobs') + .query({ labelSelector: /app\.kubernetes\.io\/component=execution_controller,teraslice\.terascope\.io\/exId=.*/ }) + .reply(200, { + kind: 'JobList', + items: [jobNoName] + }); + + await expect(k8s._deleteObjByExId('no-name', 'execution_controller', 'jobs')) + .rejects.toThrow('Cannot delete jobs for ExId: no-name by name because it has no name'); + + }); + + it('can delete a single object', async () => { nock(_url) .get('/apis/batch/v1/namespaces/default/jobs') .query({ labelSelector: /app\.kubernetes\.io\/component=execution_controller,teraslice\.terascope\.io\/exId=.*/ }) .reply(200, { kind: 'JobList', items: [job] - }) + }); + + nock(_url) + .delete('/apis/batch/v1/namespaces/default/jobs/testJob1') + .reply(200, status); + + const response = await k8s._deleteObjByExId('testJob1', 'execution_controller', 'jobs'); + expect(response).toEqual([expect.objectContaining(status)]); + }); + + it('can delete a multiple objects', async () => { + nock(_url) .get('/api/v1/namespaces/default/pods') - .query({ labelSelector: /teraslice\.terascope\.io\/exId=.*/ }) + .query({ labelSelector: /app\.kubernetes\.io\/component=worker,teraslice\.terascope\.io\/exId=.*/ }) .reply(200, { kind: 'PodList', - items: [exPod, wkrPod] + items: [ + { metadata: { name: 'testPod1' } }, + { metadata: { name: 'testPod2' } } + ] }) - .delete('/api/v1/namespaces/default/pods/testEx1') - .reply(200, exPod) - .delete('/api/v1/namespaces/default/pods/testWkr1') - .reply(200, wkrPod) - .delete('/apis/batch/v1/namespaces/default/jobs/testJob1') - .reply(200, status); + .delete('/api/v1/namespaces/default/pods/testPod1') + .reply(200, testPod1) + .delete('/api/v1/namespaces/default/pods/testPod2') + .reply(200, testPod2); - const response = await k8s._deleteObjByExId('testJob1', 'execution_controller', 'jobs', true); - expect(response).toEqual([ - expect.objectContaining(status), - [ - expect.objectContaining(exPod), - expect.objectContaining(wkrPod) - ] - ]); + const response = await k8s._deleteObjByExId('testPods', 'worker', 'pods'); + expect(response).toEqual([expect.objectContaining(testPod1), expect.objectContaining(testPod2)]); }); });