-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a41766f
commit daa5725
Showing
3 changed files
with
96 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
ARG UBUNTU_VERSION=22.04 | ||
# This needs to generally match the container host's environment. | ||
# We use a slightly older version for greater compatibility | ||
ARG CUDA_VERSION=12.3.2 | ||
# CUDA build image | ||
ARG CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} | ||
# CUDA runtime image | ||
ARG CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} | ||
# CUDA base image (excludes cublas) | ||
ARG CUDA_BASE_CONTAINER=nvidia/cuda:${CUDA_VERSION}-base-ubuntu${UBUNTU_VERSION} | ||
|
||
FROM ghcr.io/ggerganov/whisper.cpp:main-cuda AS whisper | ||
|
||
FROM ${CUDA_BASE_CONTAINER} AS runtime | ||
|
||
COPY --from=whisper /app/server /app/models/download-ggml-model.sh /app/ | ||
|
||
RUN /bin/echo -e '#!/bin/bash\nDEBIAN_FRONTEND=noninteractive\napt-get update && apt-get install -y $@ --no-install-recommends && apt-get clean autoclean && apt-get autoremove --yes && rm -rf /var/lib/apt/lists/*' \ | ||
> /usr/local/sbin/apt_install_clean.sh && \ | ||
chmod a+x /usr/local/sbin/apt_install_clean.sh | ||
RUN /bin/echo -e '#!/bin/bash\nDEBIAN_FRONTEND=noninteractive\napt-get update && apt-get remove -y $@ && apt-get clean autoclean && apt-get autoremove --yes && rm -rf /var/lib/apt/lists/*' \ | ||
> /usr/local/sbin/apt_remove_clean.sh && \ | ||
chmod a+x /usr/local/sbin/apt_remove_clean.sh | ||
|
||
# we need just CUDA and CUBLAS | ||
# this saves ~1GB vs the -runtime image | ||
RUN /usr/local/sbin/apt_install_clean.sh libcublas-12-3 curl wget ffmpeg | ||
|
||
ENV MODEL="small-q5_1" | ||
ENV PORT="8200" | ||
ENV ADDITIONAL_ARGS="--host 0.0.0.0 --convert --inference-path /whisper" | ||
|
||
# mount volume here | ||
ENV WORKSPACE="/workspace" | ||
|
||
COPY entry.sh / | ||
ENTRYPOINT [ "/entry.sh" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/bin/sh | ||
|
||
# we expect the following env variables | ||
# MODEL="small-q5_1" | ||
# PORT="8080" | ||
# ADDITIONAL_ARGS="--metrics --host 0.0.0.0--convert --inference-path /whisper" | ||
|
||
# also handle create /public and populate | ||
|
||
# set WORKSPACE to "/" if $WORKSPACE not a directory/doesn't exist | ||
if [ ! -d "$WORKSPACE" ]; then | ||
WORKSPACE="/" | ||
fi | ||
|
||
cd $WORKSPACE | ||
mkdir -p models | ||
|
||
if [ ! -f "models/ggml-${MODEL}.bin" ]; then | ||
/app/download-ggml-model.sh $MODEL /models | ||
fi | ||
MODEL_ARG="-m $WORKSPACE/models/ggml-${MODEL}.bin" | ||
|
||
# this is the path we use for health checks | ||
mkdir -p public/v1 | ||
echo "alive" > public/v1/models | ||
PUBLIC_ARG="--public $WORKSPACE/public" | ||
|
||
/app/server $MODEL_ARG --port $PORT $ADDITIONAL_ARGS $PUBLIC_ARG |