From 8c8f4b0de57e398157412b78c09fe8ef78ab8903 Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Wed, 4 Sep 2024 09:13:23 -0600 Subject: [PATCH 1/7] Updating CHANGELOG --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index def4e2d4..0e5fa86b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,38 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +# [Unity Release 24.3] - 2024-09-22 + +## Tags + +- SPS Version 2.2.0 +- OGC API Version 1.1.0 +- OGC Python Client Version 1.1.0 + +## Repositories + +- unity-sps: +- unity-sps-ogc-processes-api: +- unity-sps-ogc-processes-api-client-python: + +## Epics + +- EPIC: `Security` + - [[Bug]: Upgrade EKS 1.27 AMIs](https://github.com/unity-sds/unity-sps/issues/159) +- EPIC: `Scaling` + - [[New Feature]: Increase ephemeral disk space for Airflow workers](https://github.com/unity-sds/unity-sps/issues/152) + - [[New Feature]: Enable users to select the EC2 type to execute a workload](https://github.com/unity-sds/unity-sps/issues/153) + - [[New Feature]: Set the DAG run status to "failed" if the main worker task failed](https://github.com/unity-sds/unity-sps/issues/189) +- EPIC: `Airflow/WPS-T Integration` + - [[New Feature]: Create test to deploy, execute and undeploy the CWL DAG](https://github.com/unity-sds/unity-sps/issues/131) +- EPIC: `Production Venue Deployments` + - [[New Feature]: Airflow HTTPD Proxy development and configuration](https://github.com/unity-sds/unity-sps/issues/125) + - [[New Feature]: Expose SPS health check endpoints](https://github.com/unity-sds/unity-sps/issues/127) +- EPIC: `SPS Infrastructure` + - [[New Feature]: Update documentation for SPS deployment](https://github.com/unity-sds/unity-sps/issues/116) + - [[New Feature]: Review the SPS GitBook documentation](https://github.com/unity-sds/unity-sps/issues/118) + - [[New Feature]: Store SPS Terraform state on S3](https://github.com/unity-sds/unity-sps/issues/132) + - [[New Feature]: Parametrize the SPS Integration Tests](https://github.com/unity-sds/unity-sps/issues/155) # [Unity Release 24.2] - 2024-07-01 From 47b3b1717823fee85bfe44349165bcbe53ab8e2e Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Wed, 4 Sep 2024 09:26:57 -0600 Subject: [PATCH 2/7] Upgrading Airflow to 2.10.0 --- airflow/dags/cwl_dag.py | 1 + airflow/docker/custom_airflow/Dockerfile | 2 +- airflow/docker/cwl/docker_cwl_entrypoint.sh | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/airflow/dags/cwl_dag.py b/airflow/dags/cwl_dag.py index 3727dc0f..64b48817 100644 --- a/airflow/dags/cwl_dag.py +++ b/airflow/dags/cwl_dag.py @@ -77,6 +77,7 @@ dag = DAG( dag_id="cwl_dag", description="CWL DAG", + dag_display_name="CWL DAG", tags=["CWL"], is_paused_upon_creation=False, catchup=False, diff --git a/airflow/docker/custom_airflow/Dockerfile b/airflow/docker/custom_airflow/Dockerfile index e11dc637..ceb0199d 100644 --- a/airflow/docker/custom_airflow/Dockerfile +++ b/airflow/docker/custom_airflow/Dockerfile @@ -1,4 +1,4 @@ -FROM apache/airflow:2.9.1-python3.11 +FROM apache/airflow:2.10.0-python3.11 RUN pip install cwltool==3.1.20240112164112 RUN pip install boto3==1.34.89 diff --git a/airflow/docker/cwl/docker_cwl_entrypoint.sh b/airflow/docker/cwl/docker_cwl_entrypoint.sh index b2a4e59a..867e3d6b 100755 --- a/airflow/docker/cwl/docker_cwl_entrypoint.sh +++ b/airflow/docker/cwl/docker_cwl_entrypoint.sh @@ -53,7 +53,7 @@ done . /usr/share/cwl/venv/bin/activate pwd ls -lR -cwl-runner --tmp-outdir-prefix "$PWD"/ --no-read-only "$cwl_workflow" "$job_args" +cwl-runner --debug --tmp-outdir-prefix "$PWD"/ --no-read-only "$cwl_workflow" "$job_args" ls -lR # Optionally, save the requested output file to a location From 758f0e1a217544ed802658b9f22b54f93c3620c1 Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Wed, 4 Sep 2024 12:29:22 -0600 Subject: [PATCH 3/7] Upgrading the CWL dag --- airflow/dags/cwl_dag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/dags/cwl_dag.py b/airflow/dags/cwl_dag.py index 64b48817..60d74760 100644 --- a/airflow/dags/cwl_dag.py +++ b/airflow/dags/cwl_dag.py @@ -25,7 +25,7 @@ # The Kubernetes namespace within which the Pod is run (it must already exist) POD_NAMESPACE = "sps" POD_LABEL = "cwl_task" -SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.1.0" +SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:airflow-2.10.0" NODE_POOL_DEFAULT = "airflow-kubernetes-pod-operator" NODE_POOL_HIGH_WORKLOAD = "airflow-kubernetes-pod-operator-high-workload" From cc6b538d232c424e3d76bbb25468dbc5ca63a601 Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Thu, 5 Sep 2024 11:44:32 -0600 Subject: [PATCH 4/7] Upgrading the custom Docker image tag --- airflow/dags/cwl_dag.py | 2 +- terraform-unity/variables.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/dags/cwl_dag.py b/airflow/dags/cwl_dag.py index 60d74760..49da454e 100644 --- a/airflow/dags/cwl_dag.py +++ b/airflow/dags/cwl_dag.py @@ -25,7 +25,7 @@ # The Kubernetes namespace within which the Pod is run (it must already exist) POD_NAMESPACE = "sps" POD_LABEL = "cwl_task" -SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:airflow-2.10.0" +SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.2.0" NODE_POOL_DEFAULT = "airflow-kubernetes-pod-operator" NODE_POOL_HIGH_WORKLOAD = "airflow-kubernetes-pod-operator-high-workload" diff --git a/terraform-unity/variables.tf b/terraform-unity/variables.tf index 19fae6d3..a8403c10 100644 --- a/terraform-unity/variables.tf +++ b/terraform-unity/variables.tf @@ -73,7 +73,7 @@ variable "airflow_docker_images" { default = { airflow = { name = "ghcr.io/unity-sds/unity-sps/sps-airflow" - tag = "2.1.2" + tag = "2.2.0" } } } From a4b99933e73a5eaf4616c9dd4553e3d4ac9e086e Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Thu, 5 Sep 2024 11:48:25 -0600 Subject: [PATCH 5/7] Setting tag to 2.2.0-beta-1 --- airflow/dags/cwl_dag.py | 2 +- terraform-unity/README.md | 2 +- terraform-unity/variables.tf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/dags/cwl_dag.py b/airflow/dags/cwl_dag.py index 49da454e..44a2246b 100644 --- a/airflow/dags/cwl_dag.py +++ b/airflow/dags/cwl_dag.py @@ -25,7 +25,7 @@ # The Kubernetes namespace within which the Pod is run (it must already exist) POD_NAMESPACE = "sps" POD_LABEL = "cwl_task" -SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.2.0" +SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.2.0-beta-1" NODE_POOL_DEFAULT = "airflow-kubernetes-pod-operator" NODE_POOL_HIGH_WORKLOAD = "airflow-kubernetes-pod-operator-high-workload" diff --git a/terraform-unity/README.md b/terraform-unity/README.md index bc04e9e5..69fd157e 100644 --- a/terraform-unity/README.md +++ b/terraform-unity/README.md @@ -187,7 +187,7 @@ terraform apply -no-color 2>&1 | tee apply_output.txt | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [airflow\_docker\_images](#input\_airflow\_docker\_images) | Docker images for the associated Airflow services. |
object({
airflow = object({
name = string
tag = string
})
})
|
{
"airflow": {
"name": "ghcr.io/unity-sds/unity-sps/sps-airflow",
"tag": "2.1.2"
}
}
| no | +| [airflow\_docker\_images](#input\_airflow\_docker\_images) | Docker images for the associated Airflow services. |
object({
airflow = object({
name = string
tag = string
})
})
|
{
"airflow": {
"name": "ghcr.io/unity-sds/unity-sps/sps-airflow",
"tag": "2.2.0-beta-1"
}
}
| no | | [airflow\_webserver\_password](#input\_airflow\_webserver\_password) | The password for the Airflow webserver and UI. | `string` | n/a | yes | | [airflow\_webserver\_username](#input\_airflow\_webserver\_username) | The username for the Airflow webserver and UI. | `string` | `"admin"` | no | | [dag\_catalog\_repo](#input\_dag\_catalog\_repo) | Git repository that stores the catalog of Airflow DAGs. |
object({
url = string
ref = string
dags_directory_path = string
})
|
{
"dags_directory_path": "airflow/dags",
"ref": "develop",
"url": "https://github.com/unity-sds/unity-sps.git"
}
| no | diff --git a/terraform-unity/variables.tf b/terraform-unity/variables.tf index a8403c10..f13e59d4 100644 --- a/terraform-unity/variables.tf +++ b/terraform-unity/variables.tf @@ -73,7 +73,7 @@ variable "airflow_docker_images" { default = { airflow = { name = "ghcr.io/unity-sds/unity-sps/sps-airflow" - tag = "2.2.0" + tag = "2.2.0-beta-1" } } } From 879357bcfbcb39f13d472b9edd5a392106496166 Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Thu, 5 Sep 2024 14:02:24 -0600 Subject: [PATCH 6/7] Changing the Docker version in additional DAGs --- airflow/dags/docker_cwl_pod.yaml | 2 +- airflow/dags/sbg_L1_to_L2_e2e_cwl_step_by_step_dag.py | 2 +- airflow/dags/sbg_preprocess_cwl_dag.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airflow/dags/docker_cwl_pod.yaml b/airflow/dags/docker_cwl_pod.yaml index ead0a727..1a9f21ac 100644 --- a/airflow/dags/docker_cwl_pod.yaml +++ b/airflow/dags/docker_cwl_pod.yaml @@ -21,7 +21,7 @@ spec: containers: - name: cwl-docker - image: ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.1.0 + image: ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.2.0-beta-1 imagePullPolicy: Always command: ["/usr/share/cwl/docker_cwl_entrypoint.sh"] securityContext: diff --git a/airflow/dags/sbg_L1_to_L2_e2e_cwl_step_by_step_dag.py b/airflow/dags/sbg_L1_to_L2_e2e_cwl_step_by_step_dag.py index 77e1fe91..2cba0400 100644 --- a/airflow/dags/sbg_L1_to_L2_e2e_cwl_step_by_step_dag.py +++ b/airflow/dags/sbg_L1_to_L2_e2e_cwl_step_by_step_dag.py @@ -23,7 +23,7 @@ # The Kubernetes namespace within which the Pod is run (it must already exist) POD_NAMESPACE = "sps" POD_LABEL = "sbg_task" -SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.1.0" +SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.2.0-beta-1" # The path of the working directory where the CWL workflow is executed # (aka the starting directory for cwl-runner). diff --git a/airflow/dags/sbg_preprocess_cwl_dag.py b/airflow/dags/sbg_preprocess_cwl_dag.py index 46ebaa70..2d100925 100644 --- a/airflow/dags/sbg_preprocess_cwl_dag.py +++ b/airflow/dags/sbg_preprocess_cwl_dag.py @@ -17,7 +17,7 @@ # The Kubernetes namespace within which the Pod is run (it must already exist) POD_NAMESPACE = "sps" POD_LABEL = "sbg_preprocess_task" -SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.1.0" +SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.2.0-beta-1" # The path of the working directory where the CWL workflow is executed # (aka the starting directory for cwl-runner). From 14f9962df457182ecf6a7d2ab8ef9ccdf94bbccc Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Tue, 10 Sep 2024 13:49:39 -0600 Subject: [PATCH 7/7] Increasing the default volume size for the high workload node class. --- airflow/dags/cwl_dag.py | 4 ++-- terraform-unity/README.md | 2 +- terraform-unity/variables.tf | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/airflow/dags/cwl_dag.py b/airflow/dags/cwl_dag.py index 737b0d27..6f660f4e 100644 --- a/airflow/dags/cwl_dag.py +++ b/airflow/dags/cwl_dag.py @@ -99,7 +99,7 @@ "request_memory": Param( "4Gi", type="string", - enum=["8Gi", "16Gi", "32Gi", "64Gi", "128Gi", "256Gi"], + enum=["4Gi", "8Gi", "16Gi", "32Gi", "64Gi", "128Gi", "256Gi"], title="Docker container memory", ), "request_cpu": Param( @@ -111,7 +111,7 @@ "request_storage": Param( "10Gi", type="string", - enum=["10Gi", "50Gi", "100Gi", "200Gi", "300Gi"], + enum=["10Gi", "50Gi", "100Gi", "150Gi", "200Gi", "250Gi"], title="Docker container storage", ), "use_ecr": Param(False, type="boolean", title="Log into AWS Elastic Container Registry (ECR)"), diff --git a/terraform-unity/README.md b/terraform-unity/README.md index 69fd157e..73e0557f 100644 --- a/terraform-unity/README.md +++ b/terraform-unity/README.md @@ -192,7 +192,7 @@ terraform apply -no-color 2>&1 | tee apply_output.txt | [airflow\_webserver\_username](#input\_airflow\_webserver\_username) | The username for the Airflow webserver and UI. | `string` | `"admin"` | no | | [dag\_catalog\_repo](#input\_dag\_catalog\_repo) | Git repository that stores the catalog of Airflow DAGs. |
object({
url = string
ref = string
dags_directory_path = string
})
|
{
"dags_directory_path": "airflow/dags",
"ref": "develop",
"url": "https://github.com/unity-sds/unity-sps.git"
}
| no | | [helm\_charts](#input\_helm\_charts) | Helm charts for the associated services. |
map(object({
repository = string
chart = string
version = string
}))
|
{
"airflow": {
"chart": "airflow",
"repository": "https://airflow.apache.org",
"version": "1.13.1"
},
"keda": {
"chart": "keda",
"repository": "https://kedacore.github.io/charts",
"version": "v2.14.2"
}
}
| no | -| [karpenter\_node\_classes](#input\_karpenter\_node\_classes) | Configuration for karpenter\_node\_classes |
map(object({
volume_size = string
}))
|
{
"airflow-kubernetes-pod-operator-high-workload": {
"volume_size": "200Gi"
},
"default": {
"volume_size": "30Gi"
}
}
| no | +| [karpenter\_node\_classes](#input\_karpenter\_node\_classes) | Configuration for karpenter\_node\_classes |
map(object({
volume_size = string
}))
|
{
"airflow-kubernetes-pod-operator-high-workload": {
"volume_size": "300Gi"
},
"default": {
"volume_size": "30Gi"
}
}
| no | | [karpenter\_node\_pools](#input\_karpenter\_node\_pools) | Configuration for Karpenter node pools |
map(object({
requirements : list(object({
key : string
operator : string
values : list(string)
}))
nodeClassRef : string
limits : object({
cpu : string
memory : string
})
disruption : object({
consolidationPolicy : string
consolidateAfter : string
})
}))
|
{
"airflow-celery-workers": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "80",
"memory": "320Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"9"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-core-components": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "40",
"memory": "160Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"17"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-kubernetes-pod-operator": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "100",
"memory": "400Gi"
},
"nodeClassRef": "default",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"17"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"32769"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
},
"airflow-kubernetes-pod-operator-high-workload": {
"disruption": {
"consolidateAfter": "1m",
"consolidationPolicy": "WhenEmpty"
},
"limits": {
"cpu": "528",
"memory": "1056Gi"
},
"nodeClassRef": "airflow-kubernetes-pod-operator-high-workload",
"requirements": [
{
"key": "karpenter.k8s.aws/instance-family",
"operator": "In",
"values": [
"m7i",
"m6i",
"m5",
"t3",
"c7i",
"c6i",
"c5",
"r7i",
"r6i",
"r5"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Gt",
"values": [
"1"
]
},
{
"key": "karpenter.k8s.aws/instance-cpu",
"operator": "Lt",
"values": [
"49"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Gt",
"values": [
"8191"
]
},
{
"key": "karpenter.k8s.aws/instance-memory",
"operator": "Lt",
"values": [
"98305"
]
},
{
"key": "karpenter.k8s.aws/instance-hypervisor",
"operator": "In",
"values": [
"nitro"
]
}
]
}
}
| no | | [kubeconfig\_filepath](#input\_kubeconfig\_filepath) | The path to the kubeconfig file for the Kubernetes cluster. | `string` | n/a | yes | | [mcp\_ami\_owner\_id](#input\_mcp\_ami\_owner\_id) | The owner ID of the MCP AMIs | `string` | `"794625662971"` | no | diff --git a/terraform-unity/variables.tf b/terraform-unity/variables.tf index f13e59d4..48e2bc59 100644 --- a/terraform-unity/variables.tf +++ b/terraform-unity/variables.tf @@ -127,7 +127,7 @@ variable "karpenter_node_classes" { volume_size = "30Gi" } "airflow-kubernetes-pod-operator-high-workload" = { - volume_size = "200Gi" + volume_size = "300Gi" } } }