From dac53da1d5255a6329c26e86bdcb7ffb834a0541 Mon Sep 17 00:00:00 2001 From: Brian Davis Date: Mon, 29 Jul 2024 18:19:39 -0400 Subject: [PATCH] fix: move to scratch docker image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This updates several components in order to switch to scratch images. The move to scratch is to reduce additional OS dependencies and also added ldflags to remove golang debug symbols. This resulted in a reduction from 96.6MB to 54.9MB for the release image This also removes all other files except for the operator itself and the ca-certificates This is the new image structure ``` ├── etc drwxr-xr-x 0:0 214 kB │ └── ssl drwxr-xr-x 0:0 214 kB │ └── certs -rw-r--r-- 0:0 214 kB │ └── ca-certificates.crt -rwxr-xr-x 0:0 55 MB └── postgres-operator ``` Switched off of golang-alpine image because of issues with alpine images as well as the alpine golang image being experimental. https://hub.docker.com/_/golang declares "This variant is highly experimental, and not officially supported by the Go project" There are several issues with alpine due to musl as well - [Why I Will Never Use Alpine Linux Ever Again](https://martinheinz.dev/blog/92) - [Docker/Alpine - Why you should avoid alpine linux](https://dev.to/kakisoft/dockeralpine-why-you-should-avoid-alpine-linux-44he) are some examples. golang:$ver is the supported version This required some additional changes to fix the e2e tests, mainly replacing the kubectl exec for curl and wget and replacing it with port-forward and using python requests instead. This method does not require curl or wget to be included in the operator image. - removed docker/build_operator.sh which was no longer used after https://github.com/zalando/postgres-operator/pull/2665/files - since both the image and the k8s manifests reference the user id of 1000:1000, there is no need to pre-create the user or to even have a passwd file in the image, if the user was set to `pgo:pgo` then this would require the passwd file to be able to map the name to an id. - update kubectl version to match the kind version used for testing - updated e2e tests to automatically build image if it doesn't exist during pipeline tests Ran e2e tests to numerous times to confirm all the changes worked. --- Makefile | 2 +- docker/DebugDockerfile | 6 +++-- docker/Dockerfile | 22 +++++++-------- docker/build_operator.sh | 30 --------------------- e2e/Dockerfile | 4 +-- e2e/requirements.txt | 4 ++- e2e/run.sh | 15 +++++++++-- e2e/tests/k8s_api.py | 58 ++++++++++++++++++++++++++-------------- e2e/tests/test_e2e.py | 14 +++++----- 9 files changed, 78 insertions(+), 77 deletions(-) delete mode 100644 docker/build_operator.sh diff --git a/Makefile b/Makefile index 56c63cd75..f66755a2a 100644 --- a/Makefile +++ b/Makefile @@ -78,7 +78,7 @@ mocks: GO111MODULE=on go generate ./... tools: - GO111MODULE=on go get -d k8s.io/client-go@kubernetes-1.28.10 + GO111MODULE=on go get -d k8s.io/client-go@kubernetes-1.29.2 GO111MODULE=on go install github.com/golang/mock/mockgen@v1.6.0 GO111MODULE=on go mod tidy diff --git a/docker/DebugDockerfile b/docker/DebugDockerfile index ec1ff6d2f..220979e5b 100644 --- a/docker/DebugDockerfile +++ b/docker/DebugDockerfile @@ -1,8 +1,10 @@ -FROM golang:1.22-alpine +FROM golang:1.22 LABEL maintainer="Team ACID @ Zalando " +ENV DEBIAN_FRONTEND=noninteractive + # We need root certificates to deal with teams api over https -RUN apk -U add --no-cache ca-certificates delve +RUN apt update && apt install -y ca-certificates delve COPY build/* / diff --git a/docker/Dockerfile b/docker/Dockerfile index b0808c3bc..8c375b572 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,26 +1,26 @@ -ARG BASE_IMAGE=registry.opensource.zalan.do/library/alpine-3:latest -FROM golang:1.22-alpine AS builder +FROM golang:1.22 AS builder ARG VERSION=latest +ENV DEBIAN_FRONTEND=noninteractive + +# We need root certificates to deal with teams api over https +RUN apt update && apt install -y ca-certificates + COPY . /go/src/github.com/zalando/postgres-operator WORKDIR /go/src/github.com/zalando/postgres-operator RUN GO111MODULE=on go mod vendor \ - && CGO_ENABLED=0 go build -o build/postgres-operator -v -ldflags "-X=main.version=${VERSION}" cmd/main.go + && CGO_ENABLED=0 go build -o build/postgres-operator -v -ldflags "-s -w -X=main.version=${VERSION}" cmd/main.go -FROM ${BASE_IMAGE} +FROM scratch LABEL maintainer="Team ACID @ Zalando " LABEL org.opencontainers.image.source="https://github.com/zalando/postgres-operator" -# We need root certificates to deal with teams api over https -RUN apk -U upgrade --no-cache \ - && apk add --no-cache curl ca-certificates - COPY --from=builder /go/src/github.com/zalando/postgres-operator/build/* / +COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt -RUN addgroup -g 1000 pgo -RUN adduser -D -u 1000 -G pgo -g 'Postgres Operator' pgo - +# since the ID instead of the name, no passwd file is needed +# if using name (like pgo), then there must be an entry to map to an ID. USER 1000:1000 ENTRYPOINT ["/postgres-operator"] diff --git a/docker/build_operator.sh b/docker/build_operator.sh deleted file mode 100644 index 2ada63a81..000000000 --- a/docker/build_operator.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -export DEBIAN_FRONTEND=noninteractive - -arch=$(dpkg --print-architecture) - -set -ex - -# Install dependencies - -apt-get update -apt-get install -y wget - -( - cd /tmp - wget -q "https://storage.googleapis.com/golang/go1.22.5.linux-${arch}.tar.gz" -O go.tar.gz - tar -xf go.tar.gz - mv go /usr/local - ln -s /usr/local/go/bin/go /usr/bin/go - go version -) - -# Build - -export PATH="$PATH:$HOME/go/bin" -export GOPATH="$HOME/go" -mkdir -p build - -GO111MODULE=on go mod vendor -CGO_ENABLED=0 go build -o build/postgres-operator -v -ldflags "$OPERATOR_LDFLAGS" cmd/main.go diff --git a/e2e/Dockerfile b/e2e/Dockerfile index cfbc9eff7..6515cb8f0 100644 --- a/e2e/Dockerfile +++ b/e2e/Dockerfile @@ -1,6 +1,6 @@ # An image to run e2e tests. # The image does not include the tests; all necessary files are bind-mounted when a container starts. -FROM ubuntu:20.04 +FROM ubuntu:22.04 LABEL maintainer="Team ACID @ Zalando " ENV TERM xterm-256color @@ -15,7 +15,7 @@ RUN apt-get update \ curl \ vim \ && pip3 install --no-cache-dir -r requirements.txt \ - && curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.24.3/bin/linux/amd64/kubectl \ + && curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.29.2/bin/linux/amd64/kubectl \ && chmod +x ./kubectl \ && mv ./kubectl /usr/local/bin/kubectl \ && apt-get clean \ diff --git a/e2e/requirements.txt b/e2e/requirements.txt index d904585be..d91c3b53b 100644 --- a/e2e/requirements.txt +++ b/e2e/requirements.txt @@ -1,3 +1,5 @@ -kubernetes==29.2.0 +kubernetes==30.1.0 timeout_decorator==0.5.0 pyyaml==6.0.1 +kr8s==0.17.0 +requests==2.32.3 \ No newline at end of file diff --git a/e2e/run.sh b/e2e/run.sh index 1adca479d..efe5ee835 100755 --- a/e2e/run.sh +++ b/e2e/run.sh @@ -9,7 +9,7 @@ IFS=$'\n\t' readonly cluster_name="postgres-operator-e2e-tests" readonly kubeconfig_path="/tmp/kind-config-${cluster_name}" readonly spilo_image="registry.opensource.zalan.do/acid/spilo-16-e2e:0.1" -readonly e2e_test_runner_image="registry.opensource.zalan.do/acid/postgres-operator-e2e-tests-runner:0.4" +readonly e2e_test_runner_image="registry.opensource.zalan.do/acid/postgres-operator-e2e-tests-runner:0.5" export GOPATH=${GOPATH-~/go} export PATH=${GOPATH}/bin:$PATH @@ -17,6 +17,13 @@ export PATH=${GOPATH}/bin:$PATH echo "Clustername: ${cluster_name}" echo "Kubeconfig path: ${kubeconfig_path}" +# build the e2e image if it doesn't exist +function pull_or_build_e2e(){ + if ! docker manifest inspect "${e2e_test_runner_image}" >/dev/null 2>&1; then + IMAGE=${e2e_test_runner_image%:*} TAG=${e2e_test_runner_image#*:} make docker + fi +} + function pull_images(){ operator_tag=$(git describe --tags --always --dirty) if [[ -z $(docker images -q registry.opensource.zalan.do/acid/postgres-operator:${operator_tag}) ]] @@ -47,12 +54,15 @@ function load_operator_image() { } function set_kind_api_server_ip(){ + local tmpfile=$(mktemp) echo "Setting up kind API server ip" # use the actual kubeconfig to connect to the 'kind' API server # but update the IP address of the API server to the one from the Docker 'bridge' network readonly local kind_api_server_port=6443 # well-known in the 'kind' codebase readonly local kind_api_server=$(docker inspect --format "{{ .NetworkSettings.Networks.kind.IPAddress }}:${kind_api_server_port}" "${cluster_name}"-control-plane) - sed -i "s/server.*$/server: https:\/\/$kind_api_server/g" "${kubeconfig_path}" + # support older sed versions (darwin) + sed "s/server.*$/server: https:\/\/$kind_api_server/g" "${kubeconfig_path}" > "${tmpfile}" + mv "${tmpfile}" "${kubeconfig_path}" } function generate_certificate(){ @@ -84,6 +94,7 @@ function main(){ echo "Entering main function..." [[ -z ${NOCLEANUP-} ]] && trap "cleanup" QUIT TERM EXIT pull_images + pull_or_build_e2e [[ ! -f ${kubeconfig_path} ]] && start_kind load_operator_image set_kind_api_server_ip diff --git a/e2e/tests/k8s_api.py b/e2e/tests/k8s_api.py index 276ddfa25..3eca105ca 100644 --- a/e2e/tests/k8s_api.py +++ b/e2e/tests/k8s_api.py @@ -2,6 +2,9 @@ import time import subprocess import warnings +import socket +import requests +from kr8s.objects import Pod from kubernetes import client, config from kubernetes.client.rest import ApiException @@ -10,7 +13,6 @@ def to_selector(labels): return ",".join(["=".join(lbl) for lbl in labels.items()]) - class K8sApi: def __init__(self): @@ -285,15 +287,25 @@ def create_with_kubectl(self, path): def exec_with_kubectl(self, pod, cmd): return subprocess.run(["./exec.sh", pod, cmd], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + def portforward_req(self, pod, port, path): + pod = Pod.get(pod) + with pod.portforward(remote_port=port) as local_port: + # Make an API request + try: + resp = requests.get(f"http://localhost:{local_port}{path}") + if resp.status_code == 200: + return resp.json() + else: + return {"fail"} + except: + # ignore connection closed from requests + pass def patroni_rest(self, pod, path): - r = self.exec_with_kubectl(pod, "curl localhost:8008/" + path) - if not r.returncode == 0 or not r.stdout.decode()[0:1] == "{": - return None - - return json.loads(r.stdout.decode()) + return self.portforward_req(pod, "8008", path) def get_patroni_state(self, pod): r = self.exec_with_kubectl(pod, "patronictl list -f json") @@ -307,11 +319,7 @@ def get_operator_state(self): return None pod = pod.metadata.name - r = self.exec_with_kubectl(pod, "curl localhost:8080/workers/all/status/") - if not r.returncode == 0 or not r.stdout.decode()[0:1] == "{": - return None - - return json.loads(r.stdout.decode()) + return self.portforward_req(pod, "8080", "/workers/all/status/") def get_patroni_running_members(self, pod="acid-minimal-cluster-0"): result = self.get_patroni_state(pod) @@ -566,15 +574,25 @@ def create_with_kubectl(self, path): def exec_with_kubectl(self, pod, cmd): return subprocess.run(["./exec.sh", pod, cmd], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) - def patroni_rest(self, pod, path): - r = self.exec_with_kubectl(pod, "curl localhost:8008/" + path) - if not r.returncode == 0 or not r.stdout.decode()[0:1] == "{": - return None + def portforward_req(self, pod, port, path): + pod = Pod.get(pod) + with pod.portforward(remote_port=port) as local_port: + # Make an API request + try: + resp = requests.get(f"http://localhost:{local_port}{path}") + if resp.status_code == 200: + return resp.json() + else: + return {"fail"} + except: + # ignore connection closed from requests + pass - return json.loads(r.stdout.decode()) + def patroni_rest(self, pod, path): + return self.portforward_req(pod, "8008", path) def get_patroni_state(self, pod): r = self.exec_with_kubectl(pod, "patronictl list -f json") diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 5182851b4..efd6c729c 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -423,7 +423,7 @@ def test_config_update(self): self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") def compare_config(): - effective_config = k8s.patroni_rest(leader.metadata.name, "config") + effective_config = k8s.patroni_rest(leader.metadata.name, "/config") desired_config = pg_patch_config["spec"]["patroni"] desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"] effective_parameters = effective_config["postgresql"]["parameters"] @@ -979,16 +979,14 @@ def test_infrastructure_roles(self): def verify_role(): try: operator_pod = k8s.get_operator_pod() - get_config_cmd = "wget --quiet -O - localhost:8080/config" - result = k8s.exec_with_kubectl(operator_pod.metadata.name, - get_config_cmd) + config_path = "/config" + result = k8s.portforward_req(operator_pod.metadata.name, + "8080", config_path) try: - roles_dict = (json.loads(result.stdout) - .get("controller", {}) - .get("InfrastructureRoles")) + roles_dict = (result.get("controller", {}) + .get("InfrastructureRoles")) except: return False - if "robot_zmon_acid_monitoring_new" in roles_dict: role = roles_dict["robot_zmon_acid_monitoring_new"] role.pop("Password", None)