Skip to content
This repository has been archived by the owner on Dec 6, 2023. It is now read-only.

Commit

Permalink
Merge pull request #111 from casperdcl/docker-cache
Browse files Browse the repository at this point in the history
docker build speedups & memory reduction
  • Loading branch information
casperdcl authored Sep 23, 2023
2 parents 29ed998 + abf9bdf commit bf42764
Show file tree
Hide file tree
Showing 50 changed files with 202 additions and 371 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: CI
on:
push: {branches: [main]}
pull_request_target:
pull_request_target: {paths: ['*-*/**']}
schedule: [{cron: '0 11 * * 6'}] # M H d m w (Sat 11:00)
jobs:
setup:
Expand All @@ -15,7 +15,6 @@ jobs:
- id: list
name: List modified models
run: |
echo "debug: author_association: ${{ github.event.pull_request.author_association }}"
if test "${{ github.event_name }}" = schedule; then
"modified=$(ls -d *-* | sort | jq -Rsc 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
else
Expand Down
2 changes: 1 addition & 1 deletion a2t-whisper/build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -e
export VERSION=1.0.1
export VERSION=1.0.2
source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"

build_cpu ghcr.io/premai-io/audio-to-text-whisper-tiny-cpu tiny ${@:1}
Expand Down
30 changes: 8 additions & 22 deletions a2t-whisper/docker/cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,31 +1,17 @@
FROM python:3.10-slim-bullseye

ARG MODEL_ID

WORKDIR /usr/src/app/

ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1

RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
git \
wget \
libatlas-base-dev \
ffmpeg

COPY requirements.txt /usr/src/app/

RUN pip3 install --no-cache-dir -r requirements.txt
RUN apt update -qq && apt install -yqq --no-install-recommends \
build-essential git wget libatlas-base-dev ffmpeg \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

ARG MODEL_ID
COPY download.py .

RUN python3 download.py --model $MODEL_ID
RUN python download.py --model $MODEL_ID

COPY . .

ENV MODEL_ID=$MODEL_ID
ENV DEVICE=cpu

CMD python3 main.py
CMD python main.py
17 changes: 6 additions & 11 deletions a2t-whisper/docker/gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,15 @@
FROM huggingface/transformers-pytorch-gpu:latest

ARG MODEL_ID

WORKDIR /usr/src/app/

COPY requirements.txt /usr/src/app/

RUN pip3 install --no-cache-dir -r requirements.txt
RUN cd $(dirname $(which python3)) && ln -s python3 python
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

ARG MODEL_ID
COPY download.py .

RUN python3 download.py --model $MODEL_ID
RUN python download.py --model $MODEL_ID

COPY . .

ENV MODEL_ID=$MODEL_ID
ENV DEVICE=cuda

CMD python3 main.py
CMD python main.py
2 changes: 1 addition & 1 deletion cdr-replit/build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -e
export VERSION=1.0.0
export VERSION=1.0.1
source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"

build_gpu ghcr.io/premai-io/coder-replit-code-v1-3b-gpu replit/replit-code-v1-3b ${@:1}
20 changes: 7 additions & 13 deletions cdr-replit/docker/gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,21 +1,15 @@
FROM huggingface/transformers-pytorch-gpu:4.28.1

ARG MODEL_ID

RUN pip install "accelerate>=0.16.0,<1"

WORKDIR /usr/src/app/

COPY requirements.txt ./

RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
RUN cd $(dirname $(which python3)) && ln -s python3 python
COPY requirements.txt .
RUN pip install "accelerate>=0.16.0,<1"
RUN pip install --no-cache-dir -r requirements.txt

ARG MODEL_ID
COPY download.py .

RUN python3 download.py --model $MODEL_ID
RUN python download.py --model $MODEL_ID

COPY . .

ENV MODEL_ID=$MODEL_ID

CMD python3 main.py
CMD python main.py
4 changes: 1 addition & 3 deletions cdr-replit/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ def generate(
num_return_sequences=1,
eos_token_id=cls.tokenizer.eos_token_id,
)
return cls.tokenizer.decode(
tokens[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
)
return cls.tokenizer.decode(tokens[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)

@classmethod
def get_model(cls):
Expand Down
2 changes: 1 addition & 1 deletion cdr-t5/build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -e
export VERSION=1.0.0
export VERSION=1.0.1
source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"

build_cpu ghcr.io/premai-io/coder-codet5p-220m-py-cpu Salesforce/codet5p-220m-py ${@:1}
14 changes: 4 additions & 10 deletions cdr-t5/docker/cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,14 @@
FROM python:3.10-slim-bullseye

ARG MODEL_ID

WORKDIR /usr/src/app/

COPY requirements.txt ./

RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

ARG MODEL_ID
COPY download.py .

RUN python3 download.py --model $MODEL_ID
RUN python download.py --model $MODEL_ID

COPY . .

ENV MODEL_ID=$MODEL_ID
ENV DEVICE=cpu

CMD python main.py
8 changes: 2 additions & 6 deletions cdr-t5/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,14 @@ def generate(
stop: str = "",
**kwargs,
):
inputs = cls.tokenizer.encode(prompt, return_tensors="pt").to(
os.getenv("DEVICE", "cpu")
)
inputs = cls.tokenizer.encode(prompt, return_tensors="pt").to(os.getenv("DEVICE", "cpu"))
outputs = cls.model.generate(inputs, max_length=max_tokens)
return cls.tokenizer.decode(outputs[0], skip_special_tokens=True)

@classmethod
def get_model(cls):
if cls.model is None:
cls.tokenizer = AutoTokenizer.from_pretrained(
os.getenv("MODEL_ID", "Salesforce/codet5p-220m-py")
)
cls.tokenizer = AutoTokenizer.from_pretrained(os.getenv("MODEL_ID", "Salesforce/codet5p-220m-py"))
cls.model = T5ForConditionalGeneration.from_pretrained(
os.getenv("MODEL_ID", "Salesforce/codet5p-220m-py")
).to(os.getenv("DEVICE", "cpu"))
Expand Down
2 changes: 1 addition & 1 deletion cht-dolly-v2/build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -e
export VERSION=1.0.3
export VERSION=1.0.4
source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"

build_gpu ghcr.io/premai-io/chat-dolly-v2-12b-gpu databricks/dolly-v2-12b ${@:1}
22 changes: 8 additions & 14 deletions cht-dolly-v2/docker/gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,16 @@
FROM huggingface/transformers-pytorch-gpu:4.28.1

ARG MODEL_ID

RUN pip install "accelerate>=0.16.0,<1"

WORKDIR /usr/src/app/

COPY requirements.txt ./

RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
RUN cd $(dirname $(which python3)) && ln -s python3 python
COPY requirements.txt .
RUN pip install "accelerate>=0.16.0,<1"
RUN pip install --no-cache-dir -r requirements.txt

ARG MODEL_ID
COPY download.py .

RUN python3 download.py --model $MODEL_ID
RUN python download.py --model $MODEL_ID

COPY . .

ENV MODEL_ID=$MODEL_ID
ENV DEVICE=auto

CMD python3 main.py
#ENV DEVICE=auto
CMD python main.py
2 changes: 1 addition & 1 deletion cht-falcon/build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -e
export VERSION=1.0.0
export VERSION=1.0.1
source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"

build_gpu ghcr.io/premai-io/chat-falcon-7b-instruct-gpu tiiuae/falcon-7b-instruct ${@:1}
17 changes: 6 additions & 11 deletions cht-falcon/docker/gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
FROM huggingface/transformers-pytorch-gpu:4.28.1

ARG MODEL_ID

WORKDIR /usr/src/app/

COPY requirements.txt ./

RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
RUN cd $(dirname $(which python3)) && ln -s python3 python
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

ARG MODEL_ID
COPY download.py .

RUN python3 download.py --model $MODEL_ID
RUN python download.py --model $MODEL_ID

COPY . .

ENV MODEL_ID=$MODEL_ID

CMD python3 main.py
CMD python main.py
5 changes: 1 addition & 4 deletions cht-falcon/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,7 @@ def __call__(self, input_ids, scores, **kwargs) -> bool:
generated_text = self.tokenizer.decode(input_ids[0])
generated_text = generated_text.replace(self.prompt, "")
# Check if the target sequence appears in the generated text
return any(
target_sequence in generated_text
for target_sequence in self.target_sequences
)
return any(target_sequence in generated_text for target_sequence in self.target_sequences)

def __len__(self) -> int:
return len(self.target_sequences)
Expand Down
2 changes: 1 addition & 1 deletion cht-gorilla/build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -e
export VERSION=1.0.0
export VERSION=1.0.1
source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"

build_gpu ghcr.io/premai-io/chat-gorilla-falcon-7b-gpu gorilla-llm/gorilla-falcon-7b-hf-v0 ${@:1}
Expand Down
17 changes: 6 additions & 11 deletions cht-gorilla/docker/gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
FROM huggingface/transformers-pytorch-gpu:4.28.1

ARG MODEL_ID

WORKDIR /usr/src/app/

COPY requirements.txt ./

RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
RUN cd $(dirname $(which python3)) && ln -s python3 python
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

ARG MODEL_ID
COPY download.py .

RUN python3 download.py --model $MODEL_ID
RUN python download.py --model $MODEL_ID

COPY . .

ENV MODEL_ID=$MODEL_ID

CMD python3 main.py
CMD python main.py
4 changes: 1 addition & 3 deletions cht-gorilla/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ def test_chat_gorilla() -> None:
"/v1/chat/completions",
json={
"model": os.getenv("MODEL_ID", "gorilla-llm/gorilla-falcon-7b-hf-v0"),
"messages": [
{"role": "user", "content": "Generate an image of a cat"}
],
"messages": [{"role": "user", "content": "Generate an image of a cat"}],
},
)
assert response.status_code == 200, response.content
2 changes: 1 addition & 1 deletion cht-llama-cpp/build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -e
export VERSION=1.0.4
export VERSION=1.0.5
source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"

build_cpu ghcr.io/premai-io/chat-gpt4all-lora-q4-cpu gpt4all-lora-q4 ${@:1}
Expand Down
23 changes: 9 additions & 14 deletions cht-llama-cpp/docker/cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
FROM python:3.10-slim-bullseye

ARG MODEL_ID

RUN apt update && apt install -y libopenblas-dev ninja-build build-essential wget
RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools

WORKDIR /usr/src/app/

RUN wget https://prem-models.s3.eu-central-1.amazonaws.com/${MODEL_ID}.bin
RUN mkdir -p ./ml/models/
RUN mv ${MODEL_ID}.bin ./ml/models/

COPY requirements.txt ./
RUN apt update -qq && apt install -yqq --no-install-recommends \
libopenblas-dev ninja-build build-essential wget \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
#RUN pip install pytest cmake scikit-build setuptools
RUN pip install --no-cache-dir -r ./requirements.txt

RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip
ARG MODEL_ID
RUN mkdir -p ./ml/models/
RUN wget -O ./ml/models/${MODEL_ID}.bin https://prem-models.s3.eu-central-1.amazonaws.com/${MODEL_ID}.bin

COPY . .

ENV MODEL_ID=$MODEL_ID

CMD python main.py
4 changes: 1 addition & 3 deletions cht-llama-cpp/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,7 @@ async def generate_chunk_based_response(body):
async def chat_completions(body: ChatCompletionInput):
try:
if body.stream:
return StreamingResponse(
generate_chunk_based_response(body), media_type="text/event-stream"
)
return StreamingResponse(generate_chunk_based_response(body), media_type="text/event-stream")
return model.generate(
messages=body.messages,
temperature=body.temperature,
Expand Down
2 changes: 1 addition & 1 deletion cht-llama-v2/build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -e
export VERSION=1.0.1
export VERSION=1.0.2
source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh"

build_gpu ghcr.io/premai-io/chat-llama-2-7b-gpu llama-2-7b-hf ${@:1}
Expand Down
Loading

0 comments on commit bf42764

Please sign in to comment.