Skip to content

Commit

Permalink
feat(sage): create the project sage-otel-collector (ARCH-324) (#2908)
Browse files Browse the repository at this point in the history
Co-authored-by: BryanFauble <[email protected]>
  • Loading branch information
tschaffter and BryanFauble authored Nov 13, 2024
1 parent 48e5566 commit e42f2b9
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,4 @@ docs/_build/
target/

# Ipython Notebook
.ipynb_checkpoints
.ipynb_checkpoints
5 changes: 5 additions & 0 deletions apps/sage/otel-collector/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM public.ecr.aws/aws-observability/aws-otel-collector:v0.41.1 as aws-otel

FROM otel/opentelemetry-collector-contrib:0.113.0

COPY --from=aws-otel /healthcheck /healthcheck
34 changes: 34 additions & 0 deletions apps/sage/otel-collector/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Purpose

As discussed in this Github Issue: <https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/30798#issuecomment-2009233014>
The official opentelemetry (OTEL) collector image does not contain cURL or related shell
commands required to do container level health checks. It is reliant on external
services such as the application load balancer in AWS to perform these checks. This is
problematic with our deployment of the OTEL collector as we are using AWS
service connect with AWS ECS to allow other containers within the namespace to connect
to the collector. As such, there is no load balancer in-front of the container to handle
its lifecycle. Within ECS, the recommended way from AWS to handle container level health
checks is to let ECS perform commands in the container.
Source: <https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_HealthCheck.html>

Since the OTEL collector does not have a shell, nor cURL available we need to accomplish
this another way. In the official AWS OTEL collector distro they accomplish this by
compiling a golang script down into a binary that can be run within the container.
Unfortunately we cannot use the AWS OTEL collector because they are not supporting the
`oauth2clientauthextension`: <https://github.com/aws-observability/aws-otel-collector/issues/1492>.

For our purposes we are creating a new image based off the `otel/opentelemetry-collector-contrib` image,
but with the addition of the healthcheck binary from the AWS OTEL distro. This
combination lets us use the oauth2 extension, and have container level health checks.

## Creating a new image (To automate later on)

As new base images are updated we will need to in-turn create a new otel collector
image that we deploy to ECS.

1. Update values in the `Dockerfile`
2. Run `docker build -t ghcr.io/sage-bionetworks/sage-otel-collector:vX.X.X .` (Replace the version)
3. Run `docker push ghcr.io/sage-bionetworks/sage-otel-collector:vX.X.X` (Replace the version)

Once a new image is built and pushed, then you'll want to update the values in the CDK
scripts to use the new image version.
32 changes: 32 additions & 0 deletions apps/sage/otel-collector/project.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"name": "sage-otel-collector",
"$schema": "../../../node_modules/nx/schemas/project-schema.json",
"projectType": "application",
"targets": {
"serve-detach": {
"executor": "nx:run-commands",
"options": {
"command": "docker/sage/serve-detach.sh sage-otel-collector"
}
},
"publish-image": {
"executor": "@nx-tools/nx-container:build",
"options": {
"context": "apps/sage/otel-collector",
"metadata": {
"images": ["ghcr.io/sage-bionetworks/{projectName}"],
"tags": ["type=edge,branch=main", "type=sha"]
},
"push": true
},
"dependsOn": ["build-image"]
},
"scan-image": {
"executor": "nx:run-commands",
"options": {
"command": "trivy image ghcr.io/sage-bionetworks/{projectName}:local --quiet",
"color": true
}
}
}
}
4 changes: 4 additions & 0 deletions docker/sage/networks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
networks:
sage:
name: sage
driver: bridge
12 changes: 12 additions & 0 deletions docker/sage/serve-detach.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash

args=(
# List of services in alphanumeric order
--file docker/sage/services/otel-collector.yml

--file docker/sage/networks.yml

up $1 --detach --remove-orphans
)

docker compose "${args[@]}"
18 changes: 18 additions & 0 deletions docker/sage/services/otel-collector.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
services:
sage-otel-collector:
image: ghcr.io/sage-bionetworks/sage-otel-collector:${SAGE_VERSION:-local}
container_name: sage-otel-collector
restart: always
networks:
- sage
ports:
- '1888:1888' # pprof extension
- '8888:8888' # Prometheus metrics exposed by the collector
- '8889:8889' # Prometheus exporter metrics
- '13133:13133' # health_check extension
- '4317:4317' # OTLP gRPC receiver
- '55679:55679' # zpages extension
deploy:
resources:
limits:
memory: 200M
2 changes: 1 addition & 1 deletion libs/sage-monorepo/nx-plugin/src/plugins/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ function writeProjectConfigurationsToCache(
writeJsonFile(cachePath, results);
}

const projectFilePattern = '{apps,libs}/{openchallenges,agora,sandbox}/**/project.json';
const projectFilePattern = '{apps,libs}/{openchallenges,agora,sage,sandbox}/**/project.json';

export const createNodesV2: CreateNodesV2<SageMonorepoPluginOptions> = [
projectFilePattern,
Expand Down

0 comments on commit e42f2b9

Please sign in to comment.