diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 025a8fa..ea34c83 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -11,6 +11,7 @@ on: - psql-cli/**/* - llama.cpp-base/**/* - llama.cpp/**/* + - whisper.cpp/**/* workflow_dispatch: # enables manual runs jobs: @@ -129,3 +130,33 @@ jobs: cd $IMAGE docker build --pull -t "${{ env.CI_REGISTRY_PATH }}/${{ env.IMAGE }}" . docker push "${{ env.CI_REGISTRY_PATH }}/${{ env.IMAGE }}" + + build_whisper: + name: Build whisper.cpp Image + runs-on: ubuntu-latest + needs: validate + + services: + docker: + image: docker:20.10.16 + options: --privileged + + env: + IMAGE: whisper.cpp + CI_REGISTRY_PATH: index.docker.io/ahoylabs + CI_REGISTRY_USER: ${{ secrets.CI_REGISTRY_USER }} + CI_REGISTRY_PASSWORD: ${{ secrets.CI_REGISTRY_PASSWORD }} + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Log in to Docker Hub + run: docker login -u "${{ secrets.CI_REGISTRY_USER }}" -p "${{ secrets.CI_REGISTRY_PASSWORD }}" + + - name: Build whisper.cpp Docker image + run: | + cd $IMAGE + docker build --pull -t "${{ env.CI_REGISTRY_PATH }}/${{ env.IMAGE }}" . + docker push "${{ env.CI_REGISTRY_PATH }}/${{ env.IMAGE }}" + \ No newline at end of file diff --git a/whisper.cpp/Dockerfile b/whisper.cpp/Dockerfile new file mode 100644 index 0000000..bd5a3ee --- /dev/null +++ b/whisper.cpp/Dockerfile @@ -0,0 +1,37 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +# We use a slightly older version for greater compatibility +ARG CUDA_VERSION=12.3.2 +# CUDA build image +ARG CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} +# CUDA runtime image +ARG CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} +# CUDA base image (excludes cublas) +ARG CUDA_BASE_CONTAINER=nvidia/cuda:${CUDA_VERSION}-base-ubuntu${UBUNTU_VERSION} + +FROM ghcr.io/ggerganov/whisper.cpp:main-cuda AS whisper + +FROM ${CUDA_BASE_CONTAINER} AS runtime + +COPY --from=whisper /app/server /app/models/download-ggml-model.sh /app/ + +RUN /bin/echo -e '#!/bin/bash\nDEBIAN_FRONTEND=noninteractive\napt-get update && apt-get install -y $@ --no-install-recommends && apt-get clean autoclean && apt-get autoremove --yes && rm -rf /var/lib/apt/lists/*' \ + > /usr/local/sbin/apt_install_clean.sh && \ + chmod a+x /usr/local/sbin/apt_install_clean.sh +RUN /bin/echo -e '#!/bin/bash\nDEBIAN_FRONTEND=noninteractive\napt-get update && apt-get remove -y $@ && apt-get clean autoclean && apt-get autoremove --yes && rm -rf /var/lib/apt/lists/*' \ + > /usr/local/sbin/apt_remove_clean.sh && \ + chmod a+x /usr/local/sbin/apt_remove_clean.sh + +# we need just CUDA and CUBLAS +# this saves ~1GB vs the -runtime image +RUN /usr/local/sbin/apt_install_clean.sh libcublas-12-3 curl wget ffmpeg + +ENV MODEL="small-q5_1" +ENV PORT="8200" +ENV ADDITIONAL_ARGS="--host 0.0.0.0 --convert --inference-path /whisper" + +# mount volume here +ENV WORKSPACE="/workspace" + +COPY entry.sh / +ENTRYPOINT [ "/entry.sh" ] diff --git a/whisper.cpp/entry.sh b/whisper.cpp/entry.sh new file mode 100755 index 0000000..5ab0f1b --- /dev/null +++ b/whisper.cpp/entry.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +# we expect the following env variables +# MODEL="small-q5_1" +# PORT="8080" +# ADDITIONAL_ARGS="--metrics --host 0.0.0.0--convert --inference-path /whisper" + +# also handle create /public and populate + +# set WORKSPACE to "/" if $WORKSPACE not a directory/doesn't exist +if [ ! -d "$WORKSPACE" ]; then + WORKSPACE="/" +fi + +cd $WORKSPACE +mkdir -p models + +if [ ! -f "models/ggml-${MODEL}.bin" ]; then + /app/download-ggml-model.sh $MODEL /models +fi +MODEL_ARG="-m $WORKSPACE/models/ggml-${MODEL}.bin" + +# this is the path we use for health checks +mkdir -p public/v1 +echo "alive" > public/v1/models +PUBLIC_ARG="--public $WORKSPACE/public" + +/app/server $MODEL_ARG --port $PORT $ADDITIONAL_ARGS $PUBLIC_ARG \ No newline at end of file