From 82282a00abdaed21e8381052a874d8ab9a4f7e0a Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Tue, 16 Apr 2024 19:45:13 +0400 Subject: [PATCH] Break `docker-compose.yml` into smaller `compose.yml` files (#4114) --- .github/workflows/ci_cd.yml | 4 +- api/compose.yml | 60 +++++ catalog/compose.yml | 106 ++++++++ docker-compose.yml | 243 +----------------- frontend/compose.yml | 21 ++ frontend/docker-compose.playwright.yml | 1 - ingestion_server/compose.yml | 39 +++ ingestion_server/test/env.integration | 17 -- .../test/gen_integration_compose.py | 124 +++------ 9 files changed, 270 insertions(+), 345 deletions(-) create mode 100644 api/compose.yml create mode 100644 catalog/compose.yml create mode 100644 frontend/compose.yml create mode 100644 ingestion_server/compose.yml delete mode 100644 ingestion_server/test/env.integration diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index 9557c93e579..743c244a23b 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -366,7 +366,9 @@ jobs: setup_images: upstream_db ingestion_server - name: Run ingestion-server tests - run: just ingestion_server/test-local + run: | + just env + just ingestion_server/test-local - name: Print ingestion-server test logs run: | diff --git a/api/compose.yml b/api/compose.yml new file mode 100644 index 00000000000..62835af4cb3 --- /dev/null +++ b/api/compose.yml @@ -0,0 +1,60 @@ +services: + cache: + profiles: + - api + image: docker.io/redis:7.2.4 + ports: + - "50263:6379" + + web: + profiles: + - api + build: + target: api + args: # Automatically inferred from env vars, unless specified + - SEMANTIC_VERSION=${SEMANTIC_VERSION:-v1.0.0} + - API_PY_VERSION + image: openverse-api + volumes: + - .:/api:z + ports: + - "50280:50280" + depends_on: + - db + - es + - cache + env_file: + - env.docker + - .env + stdin_open: true + tty: true + + nginx: + profiles: + - api + build: + target: nginx + args: # Automatically inferred from env vars, unless specified + - SEMANTIC_VERSION=${SEMANTIC_VERSION:-v1.0.0} + - API_PY_VERSION + ports: + - "50270:8080" + environment: + DJANGO_NGINX_UPSTREAM_URL: web:50280 + depends_on: + - web + + proxy: + profiles: + - api + image: docker.io/nginx:alpine + ports: + - "50200:9080" + - "50243:9443" + environment: + HTTPS_PORT: 50243 # See `ports` mapping above. + depends_on: + - web + volumes: + - ../docker/nginx/templates:/etc/nginx/templates:z + - ../docker/nginx/certs:/etc/nginx/certs:z diff --git a/catalog/compose.yml b/catalog/compose.yml new file mode 100644 index 00000000000..4dc9c2b3ae1 --- /dev/null +++ b/catalog/compose.yml @@ -0,0 +1,106 @@ +# Common build configuration for Airflow +# Extension field, see https://docs.docker.com/compose/compose-file/compose-file-v3/#extension-fields +x-airflow-common: &airflow-common + profiles: + - catalog + restart: on-failure + depends_on: + - postgres + - s3 + image: openverse_catalog + env_file: + - .env + build: + target: cat + args: # Automatically inferred from env vars, unless specified + - REQUIREMENTS_FILE=requirements-dev.txt + - CATALOG_PY_VERSION + - CATALOG_AIRFLOW_VERSION + volumes: + - .:/opt/airflow/catalog:z + - catalog-cache:/home/airflow/.cache + +services: + # Dev changes for the scheduler + scheduler: + <<: *airflow-common + depends_on: + - upstream_db + - s3 + command: scheduler + expose: + - "8793" # Used for fetching logs + environment: + # Upgrade the DB on startup + _AIRFLOW_DB_MIGRATE: "true" + _AIRFLOW_WWW_USER_CREATE: "true" + _AIRFLOW_WWW_USER_USERNAME: airflow + _AIRFLOW_WWW_USER_PASSWORD: airflow + _AIRFLOW_WWW_USER_FIRSTNAME: Air + _AIRFLOW_WWW_USER_LASTNAME: Flow + _AIRFLOW_WWW_USER_EMAIL: airflow@example.com + + # Dev changes for the triggerer + triggerer: + <<: *airflow-common + depends_on: + - upstream_db + - s3 + expose: + - "8794" # Used for logs + command: triggerer + + # Dev changes for the webserver container + webserver: + <<: *airflow-common + depends_on: + - upstream_db + - s3 + - scheduler + - triggerer + command: webserver + ports: + - "${AIRFLOW_PORT}:8080" + + s3: + profiles: + - catalog_dependencies + - catalog + image: docker.io/minio/minio:latest + ports: + - "5010:5000" + - "5011:5001" + env_file: + - .env + - ../docker/minio/.env + command: minio server /data --address :5000 --console-address :5001 + volumes: + - minio:/data + - ../docker/minio/s3_entrypoint.sh:/opt/minio/s3_entrypoint.sh:ro,z + entrypoint: /opt/minio/s3_entrypoint.sh + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5010/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + load_to_s3: + profiles: + - catalog_dependencies + - catalog + image: docker.io/minio/mc:latest + env_file: + - .env + - ../docker/minio/.env + depends_on: + - s3 + volumes: + # Buckets for testing provider data imported from s3 are subdirectories under + # /tests/s3-data/ + - ./tests/s3-data:/data:rw,z + - ../docker/minio/load_to_s3_entrypoint.sh:/opt/minio/load_to_s3_entrypoint.sh:ro,z + entrypoint: /opt/minio/load_to_s3_entrypoint.sh + +volumes: + catalog-cache: + minio: diff --git a/docker-compose.yml b/docker-compose.yml index df894f22295..6006da5d38a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,28 +1,10 @@ version: "2.4" -# Common build configuration for Airflow -# Extension field, see https://docs.docker.com/compose/compose-file/compose-file-v3/#extension-fields -x-airflow-common: &airflow-common - profiles: - - catalog - restart: on-failure - depends_on: - - postgres - - s3 - image: openverse_catalog - env_file: - - .env - - catalog/.env - build: - context: ./catalog/ - target: cat - args: # Automatically inferred from env vars, unless specified - - REQUIREMENTS_FILE=requirements-dev.txt - - CATALOG_PY_VERSION - - CATALOG_AIRFLOW_VERSION - volumes: - - ./catalog:/opt/airflow/catalog:z - - catalog-cache:/home/airflow/.cache +include: + - "catalog/compose.yml" + - "ingestion_server/compose.yml" + - "api/compose.yml" + - "frontend/compose.yml" services: # Database used by the API @@ -63,87 +45,6 @@ services: healthcheck: test: "pg_isready -U deploy -d openledger" - s3: - profiles: - - catalog_dependencies - - catalog - image: docker.io/minio/minio:latest - ports: - - "5010:5000" - - "5011:5001" - env_file: - - .env - - docker/minio/.env - - command: minio server /data --address :5000 --console-address :5001 - volumes: - - minio:/data - - ./docker/minio/s3_entrypoint.sh:/opt/minio/s3_entrypoint.sh:ro,z - entrypoint: /opt/minio/s3_entrypoint.sh - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:5010/minio/health/live"] - interval: 30s - timeout: 20s - retries: 3 - - load_to_s3: - profiles: - - catalog_dependencies - - catalog - image: docker.io/minio/mc:latest - env_file: - - .env - - docker/minio/.env - depends_on: - - s3 - volumes: - # Buckets for testing provider data imported from s3 are subdirectories under - # /tests/s3-data/ - - ./catalog/tests/s3-data:/data:rw,z - - ./docker/minio/load_to_s3_entrypoint.sh:/opt/minio/load_to_s3_entrypoint.sh:ro,z - entrypoint: /opt/minio/load_to_s3_entrypoint.sh - - # Dev changes for the scheduler - scheduler: - <<: *airflow-common - depends_on: - - upstream_db - - s3 - command: scheduler - expose: - - "8793" # Used for fetching logs - environment: - # Upgrade the DB on startup - _AIRFLOW_DB_MIGRATE: "true" - _AIRFLOW_WWW_USER_CREATE: "true" - _AIRFLOW_WWW_USER_USERNAME: airflow - _AIRFLOW_WWW_USER_PASSWORD: airflow - _AIRFLOW_WWW_USER_FIRSTNAME: Air - _AIRFLOW_WWW_USER_LASTNAME: Flow - _AIRFLOW_WWW_USER_EMAIL: airflow@example.com - - # Dev changes for the triggerer - triggerer: - <<: *airflow-common - depends_on: - - upstream_db - - s3 - expose: - - "8794" # Used for logs - command: triggerer - - # Dev changes for the webserver container - webserver: - <<: *airflow-common - depends_on: - - upstream_db - - s3 - - scheduler - - triggerer - command: webserver - ports: - - "${AIRFLOW_PORT}:8080" - plausible_db: profiles: - frontend @@ -183,13 +84,6 @@ services: env_file: - docker/plausible/env.docker - cache: - profiles: - - api - image: docker.io/redis:7.2.4 - ports: - - "50263:6379" - es: profiles: - ingestion_server @@ -215,136 +109,9 @@ services: volumes: - es-data:/usr/share/elasticsearch/data - web: - profiles: - - api - build: - context: ./api/ - target: api - args: # Automatically inferred from env vars, unless specified - - SEMANTIC_VERSION=${SEMANTIC_VERSION:-v1.0.0} - - API_PY_VERSION - image: openverse-api - volumes: - - ./api:/api:z - ports: - - "50280:50280" - depends_on: - - db - - es - - cache - env_file: - - api/env.docker - - api/.env - stdin_open: true - tty: true - - ingestion_server: - profiles: - - ingestion_server - - api - build: - context: ./ingestion_server/ - target: ing - args: # Automatically inferred from env vars, unless specified - - INGESTION_PY_VERSION - image: openverse-ingestion_server - ports: - - "50281:8001" - depends_on: - - db - - upstream_db - - es - - indexer_worker - volumes: - - ./ingestion_server:/ingestion_server:z - env_file: - - ingestion_server/env.docker - - ingestion_server/.env - stdin_open: true - tty: true - - indexer_worker: - profiles: - - ingestion_server - - api - build: - context: ./ingestion_server/ - target: ing - args: # Automatically inferred from env vars, unless specified - - INGESTION_PY_VERSION - image: openverse-ingestion_server - command: gunicorn indexer_worker:api --bind 0.0.0.0:8002 - expose: - - "8002" - depends_on: - - db - - upstream_db - - es - volumes: - - ./ingestion_server:/ingestion_server:z - env_file: - - ingestion_server/env.docker - stdin_open: true - tty: true - - nginx: - profiles: - - api - build: - context: ./api/ - target: nginx - args: # Automatically inferred from env vars, unless specified - - SEMANTIC_VERSION=${SEMANTIC_VERSION:-v1.0.0} - - API_PY_VERSION - ports: - - "50270:8080" - environment: - DJANGO_NGINX_UPSTREAM_URL: web:50280 - depends_on: - - web - - proxy: - profiles: - - api - image: docker.io/nginx:alpine - ports: - - "50200:9080" - - "50243:9443" - environment: - HTTPS_PORT: 50243 # See `ports` mapping above. - depends_on: - - web - volumes: - - ./docker/nginx/templates:/etc/nginx/templates:z - - ./docker/nginx/certs:/etc/nginx/certs:z - - frontend_nginx: - profiles: - - frontend - build: - context: ./frontend/ - dockerfile: Dockerfile.nginx - target: nginx - args: # Automatically inferred from env vars, unless specified - - SEMANTIC_VERSION=${SEMANTIC_VERSION:-v1.0.0} - - FRONTEND_NODE_VERSION - - FRONTEND_PNPM_VERSION - ports: - - "50290:8080" - environment: - OPENVERSE_NGINX_UPSTREAM_URL: ${HOST_NETWORK_ADDRESS:-172.17.0.1}:8443 - OPENVERSE_NGINX_PLAUSIBLE_EVENT_URL: http://plausible:8000/api/event - # Set to Docker network resolver to be able to resolve the `plausible` container. - # This would be the default but Nginx requires it to be explicitly set when - # making outgoing requests (e.g., to plausible.io) - OPENVERSE_NGINX_DNS_RESOLVER: 127.0.0.11 - volumes: api-postgres: catalog-postgres: plausible-postgres: plausible-clickhouse: es-data: - minio: - catalog-cache: diff --git a/frontend/compose.yml b/frontend/compose.yml new file mode 100644 index 00000000000..522c1c9a647 --- /dev/null +++ b/frontend/compose.yml @@ -0,0 +1,21 @@ +services: + frontend_nginx: + profiles: + - frontend + build: + context: . + dockerfile: Dockerfile.nginx + target: nginx + args: # Automatically inferred from env vars, unless specified + - SEMANTIC_VERSION=${SEMANTIC_VERSION:-v1.0.0} + - FRONTEND_NODE_VERSION + - FRONTEND_PNPM_VERSION + ports: + - "50290:8080" + environment: + OPENVERSE_NGINX_UPSTREAM_URL: ${HOST_NETWORK_ADDRESS:-172.17.0.1}:8443 + OPENVERSE_NGINX_PLAUSIBLE_EVENT_URL: http://plausible:8000/api/event + # Set to Docker network resolver to be able to resolve the `plausible` container. + # This would be the default but Nginx requires it to be explicitly set when + # making outgoing requests (e.g., to plausible.io) + OPENVERSE_NGINX_DNS_RESOLVER: 127.0.0.11 diff --git a/frontend/docker-compose.playwright.yml b/frontend/docker-compose.playwright.yml index 4f1ec46c960..93ad9383adb 100644 --- a/frontend/docker-compose.playwright.yml +++ b/frontend/docker-compose.playwright.yml @@ -19,4 +19,3 @@ services: - DEBUG=pw:webserver - UPDATE_TAPES=${UPDATE_TAPES:-false} - FASTSTART=${FASTSTART:-false} - cpus: 0.000 diff --git a/ingestion_server/compose.yml b/ingestion_server/compose.yml new file mode 100644 index 00000000000..3644bd1b5cf --- /dev/null +++ b/ingestion_server/compose.yml @@ -0,0 +1,39 @@ +# Common build configuration for the ingestion server +# Extension field, see https://docs.docker.com/compose/compose-file/compose-file-v3/#extension-fields +x-ingestion-server-common: &ingestion-server-common + profiles: + - ingestion_server + - api + build: + target: ing + args: # Automatically inferred from env vars, unless specified + - INGESTION_PY_VERSION + image: openverse-ingestion_server + env_file: + - env.docker + - .env + volumes: + - .:/ingestion_server:z + stdin_open: true + tty: true + +services: + ingestion_server: + <<: *ingestion-server-common + depends_on: + - db + - upstream_db + - es + - indexer_worker + ports: + - "50281:8001" + + indexer_worker: + <<: *ingestion-server-common + depends_on: + - db + - upstream_db + - es + command: gunicorn indexer_worker:api --bind 0.0.0.0:8002 + expose: + - "8002" diff --git a/ingestion_server/test/env.integration b/ingestion_server/test/env.integration deleted file mode 100644 index 1b245943803..00000000000 --- a/ingestion_server/test/env.integration +++ /dev/null @@ -1,17 +0,0 @@ -PYTHONUNBUFFERED="0" - -ELASTICSEARCH_URL="integration_es" - -DATABASE_HOST="integration_db" - -UPSTREAM_DB_HOST="integration_upstream_db" -UPSTREAM_DB_PORT="5432" - -RELATIVE_UPSTREAM_DB_HOST="integration_upstream_db" -RELATIVE_UPSTREAM_DB_PORT="5432" - -LOCK_PATH="/worker_state/lock" -SHELF_PATH="/worker_state/db" -SLACK_WEBHOOK="" - -INDEXER_WORKER_HOST="integration_indexer_worker" diff --git a/ingestion_server/test/gen_integration_compose.py b/ingestion_server/test/gen_integration_compose.py index 8ea9f28f7f0..60c9e8c625b 100644 --- a/ingestion_server/test/gen_integration_compose.py +++ b/ingestion_server/test/gen_integration_compose.py @@ -11,6 +11,7 @@ """ import pathlib +import subprocess import yaml @@ -19,26 +20,10 @@ this_dir = pathlib.Path(__file__).resolve().parent -# Docker Compose config will be copied from ``src_dc_path`` to ``dest_dc_path`` -src_dc_path = this_dir.parent.parent.joinpath("docker-compose.yml") +# Docker Compose config will be written to ``dest_dc_path`` dest_dc_path = this_dir.joinpath("integration-docker-compose.yml") -def _prune_services(conf: dict): - """ - Prune the unnecessary services from the Docker Compose configuration. - - After this step, only those that are used in the integration tests are left. - - :param conf: the Docker Compose configuration - """ - - services_to_keep = {"es", "ingestion_server", "indexer_worker", "db", "upstream_db"} - for service_name in dict(conf["services"]): - if service_name not in services_to_keep: - del conf["services"][service_name] - - def _map_ports(conf: dict): """ Change the port mappings for the services to avoid conflicts. @@ -49,34 +34,26 @@ def _map_ports(conf: dict): :param conf: the Docker Compose configuration """ - for service_name, service in conf["services"].items(): + for service_name in conf["services"].keys(): + service = conf["services"][service_name] if "ports" in service: - ports = service["ports"] - ports = [ - f"{service_ports[service_name]}:{port.split(':')[1]}" for port in ports - ] - service["ports"] = ports - elif "expose" in service and service_name in service_ports: - exposes = service["expose"] - ports = [f"{service_ports[service_name]}:{expose}" for expose in exposes] - service["ports"] = ports + service["ports"][0]["published"] = service_ports[service_name] def _fixup_env(conf: dict): """ - Change the relative paths to the environment files to absolute paths. - - This ensures that they are point to valid locations in the new Docker Compose file. + Map environment variables that reference other services to the new service + names that are just prefixed with 'integration_'. :param conf: the Docker Compose configuration """ - for service in {"es", "db", "upstream_db"}: - env_files = conf["services"][service]["env_file"] - env_files = [str(src_dc_path.parent.joinpath(path)) for path in env_files] - conf["services"][service]["env_file"] = env_files for service in {"ingestion_server", "indexer_worker"}: - conf["services"][service]["env_file"] = ["env.integration"] + env = conf["services"][service]["environment"] + conf["services"][service]["environment"] = { + key: f"integration_{value}" if value in conf["services"] else value + for key, value in env.items() + } def _remove_volumes(conf: dict): @@ -87,35 +64,15 @@ def _remove_volumes(conf: dict): :param conf: the Docker Compose configuration """ - volumes_to_remove = { - "db": "api-postgres", - "upstream_db": "catalog-postgres", - "es": "es-data", - } - - for service, volume_to_remove in volumes_to_remove.items(): - volumes = conf["services"][service]["volumes"] - volumes = [volume for volume in volumes if volume_to_remove not in volume] - conf["services"][service]["volumes"] = volumes + for service_name in conf["services"].keys(): + volumes = conf["services"][service_name]["volumes"] + conf["services"][service_name]["volumes"] = [ + volume for volume in volumes if volume["source"] not in conf["volumes"] + ] conf["volumes"] = {} -def _change_directories(conf: dict): - """ - Update the relative paths of the directories like build context or bind volumes. - - :param conf: the Docker Compose configuration - """ - - for service in {"ingestion_server", "indexer_worker"}: - conf["services"][service]["volumes"] = ["../:/ingestion_server"] - conf["services"][service]["build"] = "../" - - upstream_db_build = conf["services"]["upstream_db"]["build"]["context"] - conf["services"]["upstream_db"]["build"]["context"] = f"../../{upstream_db_build}" - - def _rename_services(conf: dict): """ Add the 'integration_' prefix to the services to distinguish them from dev services. @@ -137,39 +94,30 @@ def _rename_services(conf: dict): def gen_integration_compose(): print("Generating Docker Compose configuration for integration tests...") - with open(src_dc_path) as src_dc: - conf = yaml.safe_load(src_dc) - - print("│ Pruning unwanted services... ", end="") - _prune_services(conf) - print("done") - - print("│ Mapping alternative ports... ", end="") - _map_ports(conf) - print("done") + proc = subprocess.run( + args=["docker", "compose", "--profile", "ingestion_server", "config"], + capture_output=True, + cwd=this_dir.parents[1], + ) + conf = yaml.safe_load(proc.stdout) - print("│ Updating environment variables... ", end="") - _fixup_env(conf) - print("done") + print("│ Mapping alternative ports... ", end="") + _map_ports(conf) + print("done") - print("│ Removing volumes... ", end="") - _remove_volumes(conf) - print("done") + print("│ Updating environment variables... ", end="") + _fixup_env(conf) + print("done") - print("│ Changing directories... ", end="") - _change_directories(conf) - print("done") + print("│ Removing volumes... ", end="") + _remove_volumes(conf) + print("done") - print("│ Renaming services... ", end="") - _rename_services(conf) - print("done") + print("│ Renaming services... ", end="") + _rename_services(conf) + print("done") - with open(dest_dc_path, "w") as dest_dc: - dest_dc.write( - "# This is an auto-generated Docker Compose configuration file.\n" - "# Do not modify this file directly. Your changes will be overwritten.\n\n" - ) - yaml.dump(conf, dest_dc, default_flow_style=False) + dest_dc_path.write_text(yaml.safe_dump(conf, default_flow_style=False)) print("done\n") return dest_dc_path