diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2a54341..7c54103 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,7 +1,7 @@ name: CI on: push: {branches: [main]} - pull_request_target: + pull_request_target: {paths: ['*-*/**']} schedule: [{cron: '0 11 * * 6'}] # M H d m w (Sat 11:00) jobs: setup: @@ -15,7 +15,6 @@ jobs: - id: list name: List modified models run: | - echo "debug: author_association: ${{ github.event.pull_request.author_association }}" if test "${{ github.event_name }}" = schedule; then "modified=$(ls -d *-* | sort | jq -Rsc 'split("\n")[:-1]')" >> $GITHUB_OUTPUT else diff --git a/a2t-whisper/build.sh b/a2t-whisper/build.sh index 2ae4aab..d0f7b4f 100755 --- a/a2t-whisper/build.sh +++ b/a2t-whisper/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.1 +export VERSION=1.0.2 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_cpu ghcr.io/premai-io/audio-to-text-whisper-tiny-cpu tiny ${@:1} diff --git a/a2t-whisper/docker/cpu/Dockerfile b/a2t-whisper/docker/cpu/Dockerfile index 6128016..163e2f9 100644 --- a/a2t-whisper/docker/cpu/Dockerfile +++ b/a2t-whisper/docker/cpu/Dockerfile @@ -1,31 +1,17 @@ FROM python:3.10-slim-bullseye - -ARG MODEL_ID - WORKDIR /usr/src/app/ -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONUNBUFFERED 1 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - git \ - wget \ - libatlas-base-dev \ - ffmpeg - -COPY requirements.txt /usr/src/app/ - -RUN pip3 install --no-cache-dir -r requirements.txt +RUN apt update -qq && apt install -yqq --no-install-recommends \ + build-essential git wget libatlas-base-dev ffmpeg \ + && rm -rf /var/lib/apt/lists/* +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID +RUN python download.py --model $MODEL_ID COPY . . - ENV MODEL_ID=$MODEL_ID ENV DEVICE=cpu - -CMD python3 main.py +CMD python main.py diff --git a/a2t-whisper/docker/gpu/Dockerfile b/a2t-whisper/docker/gpu/Dockerfile index f190d5e..2d89f63 100644 --- a/a2t-whisper/docker/gpu/Dockerfile +++ b/a2t-whisper/docker/gpu/Dockerfile @@ -1,20 +1,15 @@ FROM huggingface/transformers-pytorch-gpu:latest - -ARG MODEL_ID - WORKDIR /usr/src/app/ -COPY requirements.txt /usr/src/app/ - -RUN pip3 install --no-cache-dir -r requirements.txt +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID +RUN python download.py --model $MODEL_ID COPY . . - ENV MODEL_ID=$MODEL_ID ENV DEVICE=cuda - -CMD python3 main.py +CMD python main.py diff --git a/cdr-replit/build.sh b/cdr-replit/build.sh index ed25682..04a99be 100755 --- a/cdr-replit/build.sh +++ b/cdr-replit/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.0 +export VERSION=1.0.1 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_gpu ghcr.io/premai-io/coder-replit-code-v1-3b-gpu replit/replit-code-v1-3b ${@:1} diff --git a/cdr-replit/docker/gpu/Dockerfile b/cdr-replit/docker/gpu/Dockerfile index ff29261..8f52f45 100644 --- a/cdr-replit/docker/gpu/Dockerfile +++ b/cdr-replit/docker/gpu/Dockerfile @@ -1,21 +1,15 @@ FROM huggingface/transformers-pytorch-gpu:4.28.1 - -ARG MODEL_ID - -RUN pip install "accelerate>=0.16.0,<1" - WORKDIR /usr/src/app/ -COPY requirements.txt ./ - -RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY requirements.txt . +RUN pip install "accelerate>=0.16.0,<1" +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID +RUN python download.py --model $MODEL_ID COPY . . - ENV MODEL_ID=$MODEL_ID - -CMD python3 main.py +CMD python main.py diff --git a/cdr-replit/models.py b/cdr-replit/models.py index 24c7c0b..812ce8b 100644 --- a/cdr-replit/models.py +++ b/cdr-replit/models.py @@ -26,9 +26,7 @@ def generate( num_return_sequences=1, eos_token_id=cls.tokenizer.eos_token_id, ) - return cls.tokenizer.decode( - tokens[0], skip_special_tokens=True, clean_up_tokenization_spaces=False - ) + return cls.tokenizer.decode(tokens[0], skip_special_tokens=True, clean_up_tokenization_spaces=False) @classmethod def get_model(cls): diff --git a/cdr-t5/build.sh b/cdr-t5/build.sh index 57d5c25..bf862be 100755 --- a/cdr-t5/build.sh +++ b/cdr-t5/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.0 +export VERSION=1.0.1 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_cpu ghcr.io/premai-io/coder-codet5p-220m-py-cpu Salesforce/codet5p-220m-py ${@:1} diff --git a/cdr-t5/docker/cpu/Dockerfile b/cdr-t5/docker/cpu/Dockerfile index 7e8da1c..15db572 100644 --- a/cdr-t5/docker/cpu/Dockerfile +++ b/cdr-t5/docker/cpu/Dockerfile @@ -1,20 +1,14 @@ FROM python:3.10-slim-bullseye - -ARG MODEL_ID - WORKDIR /usr/src/app/ -COPY requirements.txt ./ - -RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID +RUN python download.py --model $MODEL_ID COPY . . - ENV MODEL_ID=$MODEL_ID ENV DEVICE=cpu - CMD python main.py diff --git a/cdr-t5/models.py b/cdr-t5/models.py index e468064..427b843 100644 --- a/cdr-t5/models.py +++ b/cdr-t5/models.py @@ -15,18 +15,14 @@ def generate( stop: str = "", **kwargs, ): - inputs = cls.tokenizer.encode(prompt, return_tensors="pt").to( - os.getenv("DEVICE", "cpu") - ) + inputs = cls.tokenizer.encode(prompt, return_tensors="pt").to(os.getenv("DEVICE", "cpu")) outputs = cls.model.generate(inputs, max_length=max_tokens) return cls.tokenizer.decode(outputs[0], skip_special_tokens=True) @classmethod def get_model(cls): if cls.model is None: - cls.tokenizer = AutoTokenizer.from_pretrained( - os.getenv("MODEL_ID", "Salesforce/codet5p-220m-py") - ) + cls.tokenizer = AutoTokenizer.from_pretrained(os.getenv("MODEL_ID", "Salesforce/codet5p-220m-py")) cls.model = T5ForConditionalGeneration.from_pretrained( os.getenv("MODEL_ID", "Salesforce/codet5p-220m-py") ).to(os.getenv("DEVICE", "cpu")) diff --git a/cht-dolly-v2/build.sh b/cht-dolly-v2/build.sh index 0628bcc..94d04b2 100755 --- a/cht-dolly-v2/build.sh +++ b/cht-dolly-v2/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.3 +export VERSION=1.0.4 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_gpu ghcr.io/premai-io/chat-dolly-v2-12b-gpu databricks/dolly-v2-12b ${@:1} diff --git a/cht-dolly-v2/docker/gpu/Dockerfile b/cht-dolly-v2/docker/gpu/Dockerfile index 008ebc0..e90ec37 100644 --- a/cht-dolly-v2/docker/gpu/Dockerfile +++ b/cht-dolly-v2/docker/gpu/Dockerfile @@ -1,22 +1,16 @@ FROM huggingface/transformers-pytorch-gpu:4.28.1 - -ARG MODEL_ID - -RUN pip install "accelerate>=0.16.0,<1" - WORKDIR /usr/src/app/ -COPY requirements.txt ./ - -RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY requirements.txt . +RUN pip install "accelerate>=0.16.0,<1" +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID +RUN python download.py --model $MODEL_ID COPY . . - ENV MODEL_ID=$MODEL_ID -ENV DEVICE=auto - -CMD python3 main.py +#ENV DEVICE=auto +CMD python main.py diff --git a/cht-falcon/build.sh b/cht-falcon/build.sh index 8e8cbad..bd4e4d9 100755 --- a/cht-falcon/build.sh +++ b/cht-falcon/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.0 +export VERSION=1.0.1 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_gpu ghcr.io/premai-io/chat-falcon-7b-instruct-gpu tiiuae/falcon-7b-instruct ${@:1} diff --git a/cht-falcon/docker/gpu/Dockerfile b/cht-falcon/docker/gpu/Dockerfile index 61696ca..bc7918b 100644 --- a/cht-falcon/docker/gpu/Dockerfile +++ b/cht-falcon/docker/gpu/Dockerfile @@ -1,19 +1,14 @@ FROM huggingface/transformers-pytorch-gpu:4.28.1 - -ARG MODEL_ID - WORKDIR /usr/src/app/ -COPY requirements.txt ./ - -RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID +RUN python download.py --model $MODEL_ID COPY . . - ENV MODEL_ID=$MODEL_ID - -CMD python3 main.py +CMD python main.py diff --git a/cht-falcon/utils.py b/cht-falcon/utils.py index 5d726a2..a2494e6 100644 --- a/cht-falcon/utils.py +++ b/cht-falcon/utils.py @@ -16,10 +16,7 @@ def __call__(self, input_ids, scores, **kwargs) -> bool: generated_text = self.tokenizer.decode(input_ids[0]) generated_text = generated_text.replace(self.prompt, "") # Check if the target sequence appears in the generated text - return any( - target_sequence in generated_text - for target_sequence in self.target_sequences - ) + return any(target_sequence in generated_text for target_sequence in self.target_sequences) def __len__(self) -> int: return len(self.target_sequences) diff --git a/cht-gorilla/build.sh b/cht-gorilla/build.sh index 632b99f..190207b 100755 --- a/cht-gorilla/build.sh +++ b/cht-gorilla/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.0 +export VERSION=1.0.1 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_gpu ghcr.io/premai-io/chat-gorilla-falcon-7b-gpu gorilla-llm/gorilla-falcon-7b-hf-v0 ${@:1} diff --git a/cht-gorilla/docker/gpu/Dockerfile b/cht-gorilla/docker/gpu/Dockerfile index 61696ca..bc7918b 100644 --- a/cht-gorilla/docker/gpu/Dockerfile +++ b/cht-gorilla/docker/gpu/Dockerfile @@ -1,19 +1,14 @@ FROM huggingface/transformers-pytorch-gpu:4.28.1 - -ARG MODEL_ID - WORKDIR /usr/src/app/ -COPY requirements.txt ./ - -RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID +RUN python download.py --model $MODEL_ID COPY . . - ENV MODEL_ID=$MODEL_ID - -CMD python3 main.py +CMD python main.py diff --git a/cht-gorilla/tests/test_views.py b/cht-gorilla/tests/test_views.py index 1ec8b17..10c7a6e 100644 --- a/cht-gorilla/tests/test_views.py +++ b/cht-gorilla/tests/test_views.py @@ -11,9 +11,7 @@ def test_chat_gorilla() -> None: "/v1/chat/completions", json={ "model": os.getenv("MODEL_ID", "gorilla-llm/gorilla-falcon-7b-hf-v0"), - "messages": [ - {"role": "user", "content": "Generate an image of a cat"} - ], + "messages": [{"role": "user", "content": "Generate an image of a cat"}], }, ) assert response.status_code == 200, response.content diff --git a/cht-llama-cpp/build.sh b/cht-llama-cpp/build.sh index f844ac7..fb78f4b 100755 --- a/cht-llama-cpp/build.sh +++ b/cht-llama-cpp/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.4 +export VERSION=1.0.5 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_cpu ghcr.io/premai-io/chat-gpt4all-lora-q4-cpu gpt4all-lora-q4 ${@:1} diff --git a/cht-llama-cpp/docker/cpu/Dockerfile b/cht-llama-cpp/docker/cpu/Dockerfile index 14a31cb..a604b8e 100644 --- a/cht-llama-cpp/docker/cpu/Dockerfile +++ b/cht-llama-cpp/docker/cpu/Dockerfile @@ -1,22 +1,17 @@ FROM python:3.10-slim-bullseye - -ARG MODEL_ID - -RUN apt update && apt install -y libopenblas-dev ninja-build build-essential wget -RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools - WORKDIR /usr/src/app/ -RUN wget https://prem-models.s3.eu-central-1.amazonaws.com/${MODEL_ID}.bin -RUN mkdir -p ./ml/models/ -RUN mv ${MODEL_ID}.bin ./ml/models/ - -COPY requirements.txt ./ +RUN apt update -qq && apt install -yqq --no-install-recommends \ + libopenblas-dev ninja-build build-essential wget \ + && rm -rf /var/lib/apt/lists/* +COPY requirements.txt . +#RUN pip install pytest cmake scikit-build setuptools +RUN pip install --no-cache-dir -r ./requirements.txt -RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip +ARG MODEL_ID +RUN mkdir -p ./ml/models/ +RUN wget -O ./ml/models/${MODEL_ID}.bin https://prem-models.s3.eu-central-1.amazonaws.com/${MODEL_ID}.bin COPY . . - ENV MODEL_ID=$MODEL_ID - CMD python main.py diff --git a/cht-llama-cpp/routes.py b/cht-llama-cpp/routes.py index 5e47307..ff84caa 100644 --- a/cht-llama-cpp/routes.py +++ b/cht-llama-cpp/routes.py @@ -94,9 +94,7 @@ async def generate_chunk_based_response(body): async def chat_completions(body: ChatCompletionInput): try: if body.stream: - return StreamingResponse( - generate_chunk_based_response(body), media_type="text/event-stream" - ) + return StreamingResponse(generate_chunk_based_response(body), media_type="text/event-stream") return model.generate( messages=body.messages, temperature=body.temperature, diff --git a/cht-llama-v2/build.sh b/cht-llama-v2/build.sh index 935850a..8c2d6ba 100755 --- a/cht-llama-v2/build.sh +++ b/cht-llama-v2/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.1 +export VERSION=1.0.2 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_gpu ghcr.io/premai-io/chat-llama-2-7b-gpu llama-2-7b-hf ${@:1} diff --git a/cht-llama-v2/docker/gpu/Dockerfile b/cht-llama-v2/docker/gpu/Dockerfile index 50b61a8..5e2f95f 100644 --- a/cht-llama-v2/docker/gpu/Dockerfile +++ b/cht-llama-v2/docker/gpu/Dockerfile @@ -1,19 +1,17 @@ FROM huggingface/transformers-pytorch-gpu:4.28.1 - -ARG MODEL_ID - -RUN apt install -y wget unzip - WORKDIR /usr/src/app/ +RUN cd $(dirname $(which python3)) && ln -s python3 python +RUN apt update -qq && apt install -yqq --no-install-recommends \ + wget unzip \ + && rm -rf /var/lib/apt/lists/* +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +ARG MODEL_ID RUN wget https://prem-models.s3.eu-central-1.amazonaws.com/llama-v2/${MODEL_ID}.zip RUN unzip -o ${MODEL_ID}.zip && rm ${MODEL_ID}.zip COPY . . - -RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip - - ENV MODEL_ID=$MODEL_ID - -CMD python3 main.py +CMD python main.py diff --git a/cht-llama-v2/utils.py b/cht-llama-v2/utils.py index b837703..981352e 100644 --- a/cht-llama-v2/utils.py +++ b/cht-llama-v2/utils.py @@ -16,10 +16,7 @@ def __call__(self, input_ids, scores, **kwargs) -> bool: generated_text = self.tokenizer.decode(input_ids[0]) generated_text = generated_text.replace(self.prompt, "") # Check if the target sequence appears in the generated text - return any( - target_sequence in generated_text - for target_sequence in self.target_sequences - ) + return any(target_sequence in generated_text for target_sequence in self.target_sequences) def __len__(self) -> int: return len(self.target_sequences) diff --git a/cht-mpt/build.sh b/cht-mpt/build.sh index c620ac9..88da6d5 100755 --- a/cht-mpt/build.sh +++ b/cht-mpt/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.0 +export VERSION=1.0.1 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_gpu ghcr.io/premai-io/chat-mpt-7b-gpu mosaicml/mpt-7b-chat ${@:1} diff --git a/cht-mpt/docker/gpu/Dockerfile b/cht-mpt/docker/gpu/Dockerfile index 61696ca..bc7918b 100644 --- a/cht-mpt/docker/gpu/Dockerfile +++ b/cht-mpt/docker/gpu/Dockerfile @@ -1,19 +1,14 @@ FROM huggingface/transformers-pytorch-gpu:4.28.1 - -ARG MODEL_ID - WORKDIR /usr/src/app/ -COPY requirements.txt ./ - -RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID +RUN python download.py --model $MODEL_ID COPY . . - ENV MODEL_ID=$MODEL_ID - -CMD python3 main.py +CMD python main.py diff --git a/cht-mpt/utils.py b/cht-mpt/utils.py index 9603b8a..d2253ed 100644 --- a/cht-mpt/utils.py +++ b/cht-mpt/utils.py @@ -16,10 +16,7 @@ def __call__(self, input_ids, scores, **kwargs) -> bool: generated_text = self.tokenizer.decode(input_ids[0]) generated_text = generated_text.replace(self.prompt, "") # Check if the target sequence appears in the generated text - return any( - target_sequence in generated_text - for target_sequence in self.target_sequences - ) + return any(target_sequence in generated_text for target_sequence in self.target_sequences) def __len__(self) -> int: return len(self.target_sequences) diff --git a/cht-petals/build.sh b/cht-petals/build.sh index 56036dd..87c6491 100755 --- a/cht-petals/build.sh +++ b/cht-petals/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.1 +export VERSION=1.0.2 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_cpu ghcr.io/premai-io/chat-stable-beluga-2-cpu petals-team/StableBeluga2 ${@:1} diff --git a/cht-petals/docker/cpu/Dockerfile b/cht-petals/docker/cpu/Dockerfile index 0e1c0ee..6cfb347 100644 --- a/cht-petals/docker/cpu/Dockerfile +++ b/cht-petals/docker/cpu/Dockerfile @@ -1,18 +1,17 @@ FROM python:3.10-slim-bullseye - -RUN apt update && apt install -y libopenblas-dev ninja-build build-essential wget git -RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools - WORKDIR /usr/src/app/ -COPY requirements.txt ./ +RUN apt update -qq && apt install -yqq --no-install-recommends \ + libopenblas-dev ninja-build build-essential wget git \ + && rm -rf /var/lib/apt/lists/* +COPY requirements.txt . +#RUN pip install pytest cmake scikit-build setuptools +RUN pip install --no-cache-dir -r requirements.txt -RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip - -COPY download.py . ARG MODEL_ID -ENV MODEL_ID=$MODEL_ID -RUN python3 download.py --model $MODEL_ID +COPY download.py . +RUN python download.py --model $MODEL_ID COPY . . +ENV MODEL_ID=$MODEL_ID CMD python main.py diff --git a/cht-petals/models.py b/cht-petals/models.py index 583a7a2..8e83a15 100644 --- a/cht-petals/models.py +++ b/cht-petals/models.py @@ -58,17 +58,11 @@ def generate( @classmethod def get_model(cls): if cls.model is None: - Tokenizer = ( - LlamaTokenizer - if "llama" in os.getenv("MODEL_ID").lower() - else AutoTokenizer - ) + Tokenizer = LlamaTokenizer if "llama" in os.getenv("MODEL_ID").lower() else AutoTokenizer cls.tokenizer = Tokenizer.from_pretrained(os.getenv("MODEL_ID")) kwargs = {} if "x86_64" in machine(): kwargs["torch_dtype"] = torch.float32 - cls.model = AutoDistributedModelForCausalLM.from_pretrained( - os.getenv("MODEL_ID"), **kwargs - ) + cls.model = AutoDistributedModelForCausalLM.from_pretrained(os.getenv("MODEL_ID"), **kwargs) return cls.model diff --git a/cht-xgen/build.sh b/cht-xgen/build.sh index e6c7c66..71f583b 100755 --- a/cht-xgen/build.sh +++ b/cht-xgen/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.0 +export VERSION=1.0.1 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_gpu ghcr.io/premai-io/chat-xgen-7b-8k-inst-gpu Salesforce/xgen-7b-8k-inst ${@:1} diff --git a/cht-xgen/docker/gpu/Dockerfile b/cht-xgen/docker/gpu/Dockerfile index 61696ca..bc7918b 100644 --- a/cht-xgen/docker/gpu/Dockerfile +++ b/cht-xgen/docker/gpu/Dockerfile @@ -1,19 +1,14 @@ FROM huggingface/transformers-pytorch-gpu:4.28.1 - -ARG MODEL_ID - WORKDIR /usr/src/app/ -COPY requirements.txt ./ - -RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID +RUN python download.py --model $MODEL_ID COPY . . - ENV MODEL_ID=$MODEL_ID - -CMD python3 main.py +CMD python main.py diff --git a/dfs-dalle/build.sh b/dfs-dalle/build.sh index e41e7a3..e0d82bc 100755 --- a/dfs-dalle/build.sh +++ b/dfs-dalle/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.0 +export VERSION=1.1.0 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_gpu ghcr.io/premai-io/diffuser-dalle-mini-gpu - ${@:1} \ diff --git a/dfs-dalle/docker/gpu/Dockerfile b/dfs-dalle/docker/gpu/Dockerfile index f39cec5..ebc0c0a 100644 --- a/dfs-dalle/docker/gpu/Dockerfile +++ b/dfs-dalle/docker/gpu/Dockerfile @@ -1,37 +1,38 @@ -FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 +FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 as build +WORKDIR /usr/src/app/ + +RUN apt update -qq && apt install -yqq --no-install-recommends \ + git python3 python3-dev python3-pip \ + && rm -rf /var/lib/apt/lists/* +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt \ + jax[cuda]==0.3.25 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html -RUN apt-get update && apt-get install -y \ - git \ - python3 \ - python3-pip \ - && rm -rf /var/lib/apt/lists/* +FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 +WORKDIR /usr/src/app/ +RUN apt update -qq && apt install -yqq --no-install-recommends \ + git python3 \ + && rm -rf /var/lib/apt/lists/* +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY --from=build --link /usr/local/lib/python3.10 /usr/local/lib/python3.10 +COPY --from=build --link /usr/lib/python3.10 /usr/lib/python3.10 +COPY --from=build --link /usr/lib/python3 /usr/lib/python3 ARG DALLE_MODEL_ID ARG DALLE_REVISION_ID - ARG VQGAN_MODEL_ID ARG VQGAN_REVISION_ID - -WORKDIR /usr/src/app/ - -COPY requirements.txt /usr/src/app/ - -RUN pip3 install --no-cache-dir -r requirements.txt -RUN pip3 install jax[cuda11_cudnn82]==0.3.25 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html - COPY download.py . - -RUN python3 download.py --dalle-model $DALLE_MODEL_ID --dalle-revision $DALLE_REVISION_ID \ +RUN python download.py \ + --dalle-model $DALLE_MODEL_ID --dalle-revision $DALLE_REVISION_ID \ --vqgan-model $VQGAN_MODEL_ID --vqgan-revision $VQGAN_REVISION_ID COPY . . - ENV DALLE_MODEL_ID=$DALLE_MODEL_ID ENV DALLE_REVISION_ID=$DALLE_REVISION_ID ENV VQGAN_MODEL_ID=$VQGAN_MODEL_ID ENV VQGAN_REVISION_ID=$VQGAN_REVISION_ID - ENV DEVICE=gpu - -CMD python3 main.py +CMD python main.py diff --git a/dfs-dalle/download.py b/dfs-dalle/download.py index 14c23b7..a03f009 100644 --- a/dfs-dalle/download.py +++ b/dfs-dalle/download.py @@ -7,13 +7,9 @@ parser = argparse.ArgumentParser() parser.add_argument("--dalle-model", help="Dalle Model to download") -parser.add_argument( - "--dalle-revision", help="Dalle Revision to download", default=None, required=False -) +parser.add_argument("--dalle-revision", help="Dalle Revision to download", default=None, required=False) parser.add_argument("--vqgan-model", help="VQGAN Model to download") -parser.add_argument( - "--vqgan-revision", help="VQGAN Revision to download", default=None, required=False -) +parser.add_argument("--vqgan-revision", help="VQGAN Revision to download", default=None, required=False) args = parser.parse_args() jax.local_device_count() @@ -22,14 +18,10 @@ @retry(stop=stop_after_attempt(3), wait=wait_fixed(5)) def download_model(): - _ = DalleBart.from_pretrained( - args.dalle_model, revision=None, dtype=jax.numpy.float16, _do_init=False - ) + _ = DalleBart.from_pretrained(args.dalle_model, revision=None, dtype=jax.numpy.float16, _do_init=False) _ = DalleBartProcessor.from_pretrained(args.dalle_model, revision=None) - _ = VQModel.from_pretrained( - args.vqgan_model, revision=args.vqgan_revision or None, _do_init=False - ) + _ = VQModel.from_pretrained(args.vqgan_model, revision=args.vqgan_revision or None, _do_init=False) download_model() diff --git a/dfs-dalle/models.py b/dfs-dalle/models.py index 6a4df28..664c0a0 100644 --- a/dfs-dalle/models.py +++ b/dfs-dalle/models.py @@ -67,9 +67,7 @@ def generate( img = Image.fromarray(np.asarray(decoded_images[0] * 255, dtype=np.uint8)) buffered = io.BytesIO() img.save(buffered, format="PNG") - data.append( - {response_format: base64.b64encode(buffered.getvalue()).decode("utf-8")} - ) + data.append({response_format: base64.b64encode(buffered.getvalue()).decode("utf-8")}) return data @@ -78,9 +76,7 @@ def get_model(cls): jax.local_device_count() @partial(jax.pmap, axis_name="batch", static_broadcasted_argnums=(3, 4, 5, 6)) - def p_generate( - tokenized_prompt, key, params, top_k, top_p, temperature, condition_scale - ): + def p_generate(tokenized_prompt, key, params, top_k, top_p, temperature, condition_scale): return cls.model.generate( **tokenized_prompt, prng_key=key, @@ -107,9 +103,7 @@ def p_decode(indices, params): ) cls.decoder, vqgan_params = VQModel.from_pretrained( os.getenv("VQGAN_MODEL_ID", "dalle-mini/vqgan_imagenet_f16_16384"), - revision=os.getenv( - "VQGAN_REVISION_ID", "e93a26e7707683d349bf5d5c41c5b0ef69b677a9" - ), + revision=os.getenv("VQGAN_REVISION_ID", "e93a26e7707683d349bf5d5c41c5b0ef69b677a9"), _do_init=False, ) cls.processor = DalleBartProcessor.from_pretrained( diff --git a/dfs-dalle/requirements.txt b/dfs-dalle/requirements.txt index 575959e..648ad1a 100644 --- a/dfs-dalle/requirements.txt +++ b/dfs-dalle/requirements.txt @@ -1,13 +1,13 @@ -fastapi==0.95.0 -uvicorn==0.21.1 -pytest==7.2.2 -requests==2.28.2 -tqdm==4.65.0 -httpx==0.23.3 +fastapi==0.103.1 +uvicorn==0.23.2 +pytest==7.4.2 +requests==2.31.0 +tqdm==4.66.1 +httpx==0.25.0 python-dotenv==1.0.0 -tenacity==8.2.2 -dalle-mini==0.1.4 +tenacity==8.2.3 +dalle-mini==0.1.5 git+https://github.com/patil-suraj/vqgan-jax.git -jaxlib==0.3.25 +jaxlib flax==0.6.3 transformers==4.25.1 diff --git a/dfs-diffusers/build.sh b/dfs-diffusers/build.sh index 41a1f05..e086542 100755 --- a/dfs-diffusers/build.sh +++ b/dfs-diffusers/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.3 +export VERSION=1.0.4 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_gpu ghcr.io/premai-io/diffuser-stable-diffusion-2-1-gpu stabilityai/stable-diffusion-2-1 ${@:1} diff --git a/dfs-diffusers/docker/gpu/Dockerfile b/dfs-diffusers/docker/gpu/Dockerfile index bb8a63f..a2ed439 100644 --- a/dfs-diffusers/docker/gpu/Dockerfile +++ b/dfs-diffusers/docker/gpu/Dockerfile @@ -1,22 +1,17 @@ FROM huggingface/transformers-pytorch-gpu:latest - -ARG MODEL_ID -ARG REFINER_ID - WORKDIR /usr/src/app/ -COPY requirements.txt /usr/src/app/ - -RUN pip3 install --no-cache-dir -r requirements.txt +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID +ARG REFINER_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID --refiner "$REFINER_ID" +RUN python download.py --model $MODEL_ID --refiner "$REFINER_ID" COPY . . - ENV MODEL_ID=$MODEL_ID ENV REFINER_ID=$REFINER_ID ENV DEVICE=cuda - -CMD python3 main.py +CMD python main.py diff --git a/dfs-diffusers/download.py b/dfs-diffusers/download.py index de68de4..fc7f3c7 100644 --- a/dfs-diffusers/download.py +++ b/dfs-diffusers/download.py @@ -1,11 +1,7 @@ import argparse import torch -from diffusers import ( - DiffusionPipeline, - StableDiffusionLatentUpscalePipeline, - StableDiffusionPipeline, -) +from diffusers import DiffusionPipeline, StableDiffusionLatentUpscalePipeline, StableDiffusionPipeline from tenacity import retry, stop_after_attempt, wait_fixed parser = argparse.ArgumentParser() @@ -19,12 +15,8 @@ @retry(stop=stop_after_attempt(3), wait=wait_fixed(5)) def download_model(): if "latent" not in args.model: - _ = StableDiffusionPipeline.from_pretrained( - args.model, torch_dtype=torch.float16 - ) - _ = StableDiffusionLatentUpscalePipeline.from_pretrained( - args.model, torch_dtype=torch.float16 - ) + _ = StableDiffusionPipeline.from_pretrained(args.model, torch_dtype=torch.float16) + _ = StableDiffusionLatentUpscalePipeline.from_pretrained(args.model, torch_dtype=torch.float16) if args.refiner: _ = DiffusionPipeline.from_pretrained(args.refiner, torch_dtype=torch.float16) diff --git a/dfs-diffusers/models.py b/dfs-diffusers/models.py index 95a7ffe..1712303 100644 --- a/dfs-diffusers/models.py +++ b/dfs-diffusers/models.py @@ -49,9 +49,7 @@ def generate( if image: init_image = Image.open(io.BytesIO(image.file.read())).convert("RGB") init_image = ( - init_image.resize(tuple(map(int, (size.split("x"))))) - if size - else init_image + init_image.resize(tuple(map(int, (size.split("x"))))) if size else init_image ) # breaks e.g 512x512 -> (512, 512) model_fn = partial(model_fn, image=init_image) images = model_fn(output_type="latent" if cls.refiner_model else "pil").images @@ -63,9 +61,7 @@ def generate( buffered = io.BytesIO() img = img.resize(tuple(map(int, (size.split("x"))))) if size else img img.save(buffered, format="PNG") - data.append( - {response_format: base64.b64encode(buffered.getvalue()).decode()} - ) + data.append({response_format: base64.b64encode(buffered.getvalue()).decode()}) return data @classmethod @@ -85,11 +81,7 @@ def upscale( # size = "300x300" init_image = Image.open(io.BytesIO(image.file.read())).convert("RGB") # print("size found:", init_image.) - init_image = ( - init_image.resize(tuple(map(int, (size.split("x"))))) - if size - else init_image - ) + init_image = init_image.resize(tuple(map(int, (size.split("x"))))) if size else init_image images = cls.upscaler_model( prompt=prompt, image=init_image, @@ -105,9 +97,7 @@ def upscale( buffered = io.BytesIO() img = img.resize(tuple(map(int, (size.split("x"))))) if size else img img.save(buffered, format="PNG") - data.append( - {response_format: base64.b64encode(buffered.getvalue()).decode()} - ) + data.append({response_format: base64.b64encode(buffered.getvalue()).decode()}) return data @classmethod @@ -116,11 +106,9 @@ def get_model(cls): model_id = os.getenv("MODEL_ID", "stabilityai/stable-diffusion-2-1") print("set text img model: ", model_id) if "latent" in model_id: - cls.upscaler_model = ( - StableDiffusionLatentUpscalePipeline.from_pretrained( - model_id, torch_dtype=torch.float16 - ).to(os.getenv("DEVICE", "cpu")) - ) + cls.upscaler_model = StableDiffusionLatentUpscalePipeline.from_pretrained( + model_id, torch_dtype=torch.float16 + ).to(os.getenv("DEVICE", "cpu")) cls.upscaler_model.enable_attention_slicing() return cls.upscaler_model elif "xl" in model_id: @@ -143,14 +131,10 @@ def get_model(cls): cls.text_img_model = cls.text_img_model.to(os.getenv("DEVICE", "cpu")) cls.text_img_model.enable_attention_slicing() - cls.img_img_model = StableDiffusionImg2ImgPipeline( - **cls.text_img_model.components - ) + cls.img_img_model = StableDiffusionImg2ImgPipeline(**cls.text_img_model.components) cls.upscaler_model = StableDiffusionUpscalePipeline( **cls.text_img_model.components, - low_res_scheduler=DDPMScheduler.from_config( - cls.text_img_model.scheduler.config - ), + low_res_scheduler=DDPMScheduler.from_config(cls.text_img_model.scheduler.config), ) return cls.text_img_model diff --git a/dfs-diffusers/routes.py b/dfs-diffusers/routes.py index f2fcb95..2f62baf 100644 --- a/dfs-diffusers/routes.py +++ b/dfs-diffusers/routes.py @@ -22,9 +22,7 @@ def as_form(cls: Type[BaseModel]): inspect.Parameter( model_field.alias, inspect.Parameter.POSITIONAL_ONLY, - default=Form(...) - if model_field.required - else Form(model_field.default), + default=Form(...) if model_field.required else Form(model_field.default), annotation=model_field.outer_type_, ) ) diff --git a/ebd-all-minilm/build.sh b/ebd-all-minilm/build.sh index b0af3de..c112287 100755 --- a/ebd-all-minilm/build.sh +++ b/ebd-all-minilm/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.3 +export VERSION=1.0.4 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_cpu ghcr.io/premai-io/embeddings-all-minilm-l6-v2-cpu all-MiniLM-L6-v2 ${@:1} diff --git a/ebd-all-minilm/docker/cpu/Dockerfile b/ebd-all-minilm/docker/cpu/Dockerfile index 0f67a9d..d1129c6 100644 --- a/ebd-all-minilm/docker/cpu/Dockerfile +++ b/ebd-all-minilm/docker/cpu/Dockerfile @@ -1,30 +1,17 @@ FROM python:3.10-slim-bullseye - -ARG MODEL_ID - WORKDIR /usr/src/app/ -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONUNBUFFERED 1 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - git \ - wget \ - libatlas-base-dev - -COPY requirements.txt /usr/src/app/ - -RUN pip3 install --no-cache-dir -r requirements.txt +RUN apt update -qq && apt install -yqq --no-install-recommends \ + build-essential git wget libatlas-base-dev \ + && rm -rf /var/lib/apt/lists/* +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID +RUN python download.py --model $MODEL_ID COPY . . - ENV MODEL_ID=$MODEL_ID ENV DEVICE=cpu - -CMD python3 main.py +CMD python main.py diff --git a/ebd-all-minilm/docker/gpu/Dockerfile b/ebd-all-minilm/docker/gpu/Dockerfile index 9f86b69..2d89f63 100644 --- a/ebd-all-minilm/docker/gpu/Dockerfile +++ b/ebd-all-minilm/docker/gpu/Dockerfile @@ -1,20 +1,15 @@ FROM huggingface/transformers-pytorch-gpu:latest - -ARG MODEL_ID - WORKDIR /usr/src/app/ -COPY requirements.txt ./ - -RUN pip install --no-cache-dir -r ./requirements.txt --upgrade pip +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py --model $MODEL_ID +RUN python download.py --model $MODEL_ID COPY . . - ENV MODEL_ID=$MODEL_ID ENV DEVICE=cuda - -CMD python3 main.py +CMD python main.py diff --git a/ebd-all-minilm/routes.py b/ebd-all-minilm/routes.py index 7c0c69a..99983fd 100644 --- a/ebd-all-minilm/routes.py +++ b/ebd-all-minilm/routes.py @@ -56,9 +56,7 @@ async def embeddings(body: EmbeddingsInput): ) -@router.post( - "/engines/text-embedding-ada-002/embeddings", response_model=EmbeddingsResponse -) +@router.post("/engines/text-embedding-ada-002/embeddings", response_model=EmbeddingsResponse) async def embeddings_openai(body: EmbeddingsInput): if len(body.input) > 0 and isinstance(body.input[0], list): encoding = tiktoken.model.encoding_for_model("text-embedding-ada-002") diff --git a/pyproject.toml b/pyproject.toml index b62b8cf..bd585ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,12 @@ [tool.flake8] -max_line_length = 99 +max_line_length = 120 [tool.isort] profile = "black" +line_length = 120 + +[tool.black] +line_length = 120 [tool.mypy] check_untyped_defs = true diff --git a/t2a-bark/build.sh b/t2a-bark/build.sh index 3ff7c52..1fa2e6c 100755 --- a/t2a-bark/build.sh +++ b/t2a-bark/build.sh @@ -1,6 +1,6 @@ #!/bin/bash set -e -export VERSION=1.0.0 +export VERSION=1.0.1 source "$(dirname "${BASH_SOURCE[0]}")/../utils.sh" build_cpu ghcr.io/premai-io/text-to-audio-bark-cpu bark/t2a-bark ${@:1} diff --git a/t2a-bark/docker/cpu/Dockerfile b/t2a-bark/docker/cpu/Dockerfile index 2bb4d30..c853a20 100644 --- a/t2a-bark/docker/cpu/Dockerfile +++ b/t2a-bark/docker/cpu/Dockerfile @@ -1,32 +1,19 @@ FROM python:3.10-slim-bullseye - -ARG MODEL_ID - WORKDIR /usr/src/app/ -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONUNBUFFERED 1 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - git \ - wget \ - libatlas-base-dev - -COPY requirements.txt /usr/src/app/ - -RUN pip3 install --no-cache-dir -r requirements.txt +RUN apt update -qq && apt install -yqq --no-install-recommends \ + build-essential git wget libatlas-base-dev \ + && rm -rf /var/lib/apt/lists/* +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py +RUN python download.py COPY . . - ENV MODEL_ID=$MODEL_ID ENV DEVICE=cpu ENV SUNO_OFFLOAD_CPU=True ENV SUNO_USE_SMALL_MODELS=True - -CMD python3 main.py +CMD python main.py diff --git a/t2a-bark/docker/gpu/Dockerfile b/t2a-bark/docker/gpu/Dockerfile index f563189..6a40e30 100644 --- a/t2a-bark/docker/gpu/Dockerfile +++ b/t2a-bark/docker/gpu/Dockerfile @@ -1,20 +1,15 @@ FROM huggingface/transformers-pytorch-gpu:latest - -ARG MODEL_ID - WORKDIR /usr/src/app/ -COPY requirements.txt /usr/src/app/ - -RUN pip3 install --no-cache-dir -r requirements.txt +RUN cd $(dirname $(which python3)) && ln -s python3 python +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +ARG MODEL_ID COPY download.py . - -RUN python3 download.py +RUN python download.py COPY . . - ENV MODEL_ID=$MODEL_ID ENV DEVICE=cuda - -CMD python3 main.py +CMD python main.py