Merge pull request #111 from casperdcl/docker-cache

docker build speedups & memory reduction
premAI-io · Sep 23, 2023 · bf42764 · bf42764
2 parents 29ed998 + abf9bdf
commit bf42764
Show file tree

Hide file tree

Showing 50 changed files with 202 additions and 371 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,7 +1,7 @@
 name: CI
 on:
   push: {branches: [main]}
-  pull_request_target:
+  pull_request_target: {paths: ['*-*/**']}
   schedule: [{cron: '0 11 * * 6'}]  # M H d m w (Sat 11:00)
 jobs:
   setup:
@@ -15,7 +15,6 @@ jobs:
     - id: list
       name: List modified models
       run: |
-        echo "debug: author_association: ${{ github.event.pull_request.author_association }}"
         if test "${{ github.event_name }}" = schedule; then
           "modified=$(ls -d *-* | sort | jq -Rsc 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
         else

diff --git a/a2t-whisper/build.sh b/a2t-whisper/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 set -e
-export VERSION=1.0.1
+export VERSION=1.0.2
 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"
 
 build_cpu ghcr.io/premai-io/audio-to-text-whisper-tiny-cpu     tiny     ${@:1}

diff --git a/a2t-whisper/docker/cpu/Dockerfile b/a2t-whisper/docker/cpu/Dockerfile
@@ -1,31 +1,17 @@
 FROM python:3.10-slim-bullseye
-
-ARG MODEL_ID
-
 WORKDIR /usr/src/app/
 
-ENV PYTHONDONTWRITEBYTECODE 1
-ENV PYTHONUNBUFFERED 1
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-    build-essential \
-    git \
-    wget \
-    libatlas-base-dev \
-    ffmpeg
-
-COPY requirements.txt /usr/src/app/
-
-RUN pip3 install --no-cache-dir -r requirements.txt
+RUN apt update -qq && apt install -yqq --no-install-recommends \
+    build-essential git wget libatlas-base-dev ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
 
+ARG MODEL_ID
 COPY download.py .
-
-RUN python3 download.py --model $MODEL_ID
+RUN python download.py --model $MODEL_ID
 
 COPY . .
-
 ENV MODEL_ID=$MODEL_ID
 ENV DEVICE=cpu
-
-CMD python3 main.py
+CMD python main.py
diff --git a/a2t-whisper/docker/gpu/Dockerfile b/a2t-whisper/docker/gpu/Dockerfile
@@ -1,20 +1,15 @@
 FROM huggingface/transformers-pytorch-gpu:latest
-
-ARG MODEL_ID
-
 WORKDIR /usr/src/app/
 
-COPY requirements.txt /usr/src/app/
-
-RUN pip3 install --no-cache-dir -r requirements.txt
+RUN cd $(dirname $(which python3)) && ln -s python3 python
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
 
+ARG MODEL_ID
 COPY download.py .
-
-RUN python3 download.py --model $MODEL_ID
+RUN python download.py --model $MODEL_ID
 
 COPY . .
-
 ENV MODEL_ID=$MODEL_ID
 ENV DEVICE=cuda
-
-CMD python3 main.py
+CMD python main.py
diff --git a/cdr-replit/build.sh b/cdr-replit/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 set -e
-export VERSION=1.0.0
+export VERSION=1.0.1
 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"
 
 build_gpu ghcr.io/premai-io/coder-replit-code-v1-3b-gpu replit/replit-code-v1-3b ${@:1}
diff --git a/cdr-replit/docker/gpu/Dockerfile b/cdr-replit/docker/gpu/Dockerfile
@@ -1,21 +1,15 @@
 FROM huggingface/transformers-pytorch-gpu:4.28.1
-
-ARG MODEL_ID
-
-RUN pip install "accelerate>=0.16.0,<1"
-
 WORKDIR /usr/src/app/
 
-COPY requirements.txt ./
-
-RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
+RUN cd $(dirname $(which python3)) && ln -s python3 python
+COPY requirements.txt .
+RUN pip install "accelerate>=0.16.0,<1"
+RUN pip install --no-cache-dir -r requirements.txt
 
+ARG MODEL_ID
 COPY download.py .
-
-RUN python3 download.py --model $MODEL_ID
+RUN python download.py --model $MODEL_ID
 
 COPY . .
-
 ENV MODEL_ID=$MODEL_ID
-
-CMD python3 main.py
+CMD python main.py
diff --git a/cdr-replit/models.py b/cdr-replit/models.py
@@ -26,9 +26,7 @@ def generate(
             num_return_sequences=1,
             eos_token_id=cls.tokenizer.eos_token_id,
         )
-        return cls.tokenizer.decode(
-            tokens[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
-        )
+        return cls.tokenizer.decode(tokens[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
 
     @classmethod
     def get_model(cls):

diff --git a/cdr-t5/build.sh b/cdr-t5/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 set -e
-export VERSION=1.0.0
+export VERSION=1.0.1
 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"
 
 build_cpu ghcr.io/premai-io/coder-codet5p-220m-py-cpu Salesforce/codet5p-220m-py ${@:1}
diff --git a/cdr-t5/docker/cpu/Dockerfile b/cdr-t5/docker/cpu/Dockerfile
@@ -1,20 +1,14 @@
 FROM python:3.10-slim-bullseye
-
-ARG MODEL_ID
-
 WORKDIR /usr/src/app/
 
-COPY requirements.txt ./
-
-RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
 
+ARG MODEL_ID
 COPY download.py .
-
-RUN python3 download.py --model $MODEL_ID
+RUN python download.py --model $MODEL_ID
 
 COPY . .
-
 ENV MODEL_ID=$MODEL_ID
 ENV DEVICE=cpu
-
 CMD python main.py
diff --git a/cdr-t5/models.py b/cdr-t5/models.py
@@ -15,18 +15,14 @@ def generate(
         stop: str = "",
         **kwargs,
     ):
-        inputs = cls.tokenizer.encode(prompt, return_tensors="pt").to(
-            os.getenv("DEVICE", "cpu")
-        )
+        inputs = cls.tokenizer.encode(prompt, return_tensors="pt").to(os.getenv("DEVICE", "cpu"))
         outputs = cls.model.generate(inputs, max_length=max_tokens)
         return cls.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
     @classmethod
     def get_model(cls):
         if cls.model is None:
-            cls.tokenizer = AutoTokenizer.from_pretrained(
-                os.getenv("MODEL_ID", "Salesforce/codet5p-220m-py")
-            )
+            cls.tokenizer = AutoTokenizer.from_pretrained(os.getenv("MODEL_ID", "Salesforce/codet5p-220m-py"))
             cls.model = T5ForConditionalGeneration.from_pretrained(
                 os.getenv("MODEL_ID", "Salesforce/codet5p-220m-py")
             ).to(os.getenv("DEVICE", "cpu"))

diff --git a/cht-dolly-v2/build.sh b/cht-dolly-v2/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 set -e
-export VERSION=1.0.3
+export VERSION=1.0.4
 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"
 
 build_gpu ghcr.io/premai-io/chat-dolly-v2-12b-gpu databricks/dolly-v2-12b ${@:1}
diff --git a/cht-dolly-v2/docker/gpu/Dockerfile b/cht-dolly-v2/docker/gpu/Dockerfile
@@ -1,22 +1,16 @@
 FROM huggingface/transformers-pytorch-gpu:4.28.1
-
-ARG MODEL_ID
-
-RUN pip install "accelerate>=0.16.0,<1"
-
 WORKDIR /usr/src/app/
 
-COPY requirements.txt ./
-
-RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
+RUN cd $(dirname $(which python3)) && ln -s python3 python
+COPY requirements.txt .
+RUN pip install "accelerate>=0.16.0,<1"
+RUN pip install --no-cache-dir -r requirements.txt
 
+ARG MODEL_ID
 COPY download.py .
-
-RUN python3 download.py --model $MODEL_ID
+RUN python download.py --model $MODEL_ID
 
 COPY . .
-
 ENV MODEL_ID=$MODEL_ID
-ENV DEVICE=auto
-
-CMD python3 main.py
+#ENV DEVICE=auto
+CMD python main.py
diff --git a/cht-falcon/build.sh b/cht-falcon/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 set -e
-export VERSION=1.0.0
+export VERSION=1.0.1
 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"
 
 build_gpu ghcr.io/premai-io/chat-falcon-7b-instruct-gpu tiiuae/falcon-7b-instruct ${@:1}
diff --git a/cht-falcon/docker/gpu/Dockerfile b/cht-falcon/docker/gpu/Dockerfile
@@ -1,19 +1,14 @@
 FROM huggingface/transformers-pytorch-gpu:4.28.1
-
-ARG MODEL_ID
-
 WORKDIR /usr/src/app/
 
-COPY requirements.txt ./
-
-RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
+RUN cd $(dirname $(which python3)) && ln -s python3 python
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
 
+ARG MODEL_ID
 COPY download.py .
-
-RUN python3 download.py --model $MODEL_ID
+RUN python download.py --model $MODEL_ID
 
 COPY . .
-
 ENV MODEL_ID=$MODEL_ID
-
-CMD python3 main.py
+CMD python main.py
diff --git a/cht-falcon/utils.py b/cht-falcon/utils.py
@@ -16,10 +16,7 @@ def __call__(self, input_ids, scores, **kwargs) -> bool:
         generated_text = self.tokenizer.decode(input_ids[0])
         generated_text = generated_text.replace(self.prompt, "")
         # Check if the target sequence appears in the generated text
-        return any(
-            target_sequence in generated_text
-            for target_sequence in self.target_sequences
-        )
+        return any(target_sequence in generated_text for target_sequence in self.target_sequences)
 
     def __len__(self) -> int:
         return len(self.target_sequences)

diff --git a/cht-gorilla/build.sh b/cht-gorilla/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 set -e
-export VERSION=1.0.0
+export VERSION=1.0.1
 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"
 
 build_gpu ghcr.io/premai-io/chat-gorilla-falcon-7b-gpu gorilla-llm/gorilla-falcon-7b-hf-v0 ${@:1}

diff --git a/cht-gorilla/docker/gpu/Dockerfile b/cht-gorilla/docker/gpu/Dockerfile
@@ -1,19 +1,14 @@
 FROM huggingface/transformers-pytorch-gpu:4.28.1
-
-ARG MODEL_ID
-
 WORKDIR /usr/src/app/
 
-COPY requirements.txt ./
-
-RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
+RUN cd $(dirname $(which python3)) && ln -s python3 python
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
 
+ARG MODEL_ID
 COPY download.py .
-
-RUN python3 download.py --model $MODEL_ID
+RUN python download.py --model $MODEL_ID
 
 COPY . .
-
 ENV MODEL_ID=$MODEL_ID
-
-CMD python3 main.py
+CMD python main.py
diff --git a/cht-gorilla/tests/test_views.py b/cht-gorilla/tests/test_views.py
@@ -11,9 +11,7 @@ def test_chat_gorilla() -> None:
             "/v1/chat/completions",
             json={
                 "model": os.getenv("MODEL_ID", "gorilla-llm/gorilla-falcon-7b-hf-v0"),
-                "messages": [
-                    {"role": "user", "content": "Generate an image of  a cat"}
-                ],
+                "messages": [{"role": "user", "content": "Generate an image of  a cat"}],
             },
         )
         assert response.status_code == 200, response.content
diff --git a/cht-llama-cpp/build.sh b/cht-llama-cpp/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 set -e
-export VERSION=1.0.4
+export VERSION=1.0.5
 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"
 
 build_cpu ghcr.io/premai-io/chat-gpt4all-lora-q4-cpu gpt4all-lora-q4 ${@:1}

diff --git a/cht-llama-cpp/docker/cpu/Dockerfile b/cht-llama-cpp/docker/cpu/Dockerfile
@@ -1,22 +1,17 @@
 FROM python:3.10-slim-bullseye
-
-ARG MODEL_ID
-
-RUN apt update && apt install -y libopenblas-dev ninja-build build-essential wget
-RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools
-
 WORKDIR /usr/src/app/
 
-RUN wget https://prem-models.s3.eu-central-1.amazonaws.com/${MODEL_ID}.bin
-RUN mkdir -p ./ml/models/
-RUN mv ${MODEL_ID}.bin ./ml/models/
-
-COPY requirements.txt ./
+RUN apt update -qq && apt install -yqq --no-install-recommends \
+    libopenblas-dev ninja-build build-essential wget \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+#RUN pip install pytest cmake scikit-build setuptools
+RUN pip install --no-cache-dir -r ./requirements.txt
 
-RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
+ARG MODEL_ID
+RUN mkdir -p ./ml/models/
+RUN wget -O ./ml/models/${MODEL_ID}.bin https://prem-models.s3.eu-central-1.amazonaws.com/${MODEL_ID}.bin
 
 COPY . .
-
 ENV MODEL_ID=$MODEL_ID
-
 CMD python main.py
diff --git a/cht-llama-cpp/routes.py b/cht-llama-cpp/routes.py
@@ -94,9 +94,7 @@ async def generate_chunk_based_response(body):
 async def chat_completions(body: ChatCompletionInput):
     try:
         if body.stream:
-            return StreamingResponse(
-                generate_chunk_based_response(body), media_type="text/event-stream"
-            )
+            return StreamingResponse(generate_chunk_based_response(body), media_type="text/event-stream")
         return model.generate(
             messages=body.messages,
             temperature=body.temperature,

diff --git a/cht-llama-v2/build.sh b/cht-llama-v2/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 set -e
-export VERSION=1.0.1
+export VERSION=1.0.2
 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"
 
 build_gpu ghcr.io/premai-io/chat-llama-2-7b-gpu       llama-2-7b-hf       ${@:1}