Skip to content

Commit

Permalink
Build docker files for both CI and User (#219)
Browse files Browse the repository at this point in the history
* add docker start for user

* add docker start for user

* merge docker file

* fix

* fix deepspeed

* fix

* fix

* fix

* add ray user

* add git

* add git

* fix

* fix

* fix

* fix

* fix

* fix

* fix reademe

* fix dockerfile path

* fix re

* Update README.md

Signed-off-by: Xiaochang Wu <[email protected]>

* Update README.md

Signed-off-by: Xiaochang Wu <[email protected]>

* fix

* fix docker file

* fix docker file

* fix

* fix

* fix review

* fix md

* fix md

* fix md

* fix md

* fix rebase

* fix rebase

* fix rebase

* fix rebase

* fix

---------

Signed-off-by: Xiaochang Wu <[email protected]>
Co-authored-by: Xiaochang Wu <[email protected]>
  • Loading branch information
yutianchen666 and xwu99 authored Jul 18, 2024
1 parent 4a646b0 commit cf5d32b
Show file tree
Hide file tree
Showing 16 changed files with 260 additions and 11 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/workflow_inference_gaudi2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ jobs:
DF_SUFFIX=".gaudi2"
TARGET=${{steps.target.outputs.target}}
if [[ ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
dockerfile="dev/docker/Dockerfile.habana_vllm"
dockerfile="dev/docker/ci/Dockerfile.habana_vllm"
else
dockerfile="dev/docker/Dockerfile.habana"
dockerfile="dev/docker/ci/Dockerfile.habana"
fi
docker build --build-arg CACHEBUST=1 -f ${dockerfile} -t ${TARGET}:habana .
docker container prune -f
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/workflow_test_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:
run: |
DF_SUFFIX=".vllm"
TARGET=${{steps.target.outputs.target}}
docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ inputs.http_proxy }} --build-arg https_proxy=${{ inputs.https_proxy }} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${TARGET}:latest
docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ inputs.http_proxy }} --build-arg https_proxy=${{ inputs.https_proxy }} -f dev/docker/ci/Dockerfile${DF_SUFFIX} -t ${TARGET}:latest
docker container prune -f
docker image prune -f
Expand Down
66 changes: 65 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ LLM-on-Ray's modular workflow structure is designed to comprehensively cater to
![llm-on-ray](./docs/assets/solution_technical_overview.png)


## Getting Started
## Getting Started Locally With Source code
This guide will assist you in setting up LLM-on-Ray on Intel CPU locally, covering the initial setup, finetuning models, and deploying them for serving.
### Setup

Expand Down Expand Up @@ -109,6 +109,70 @@ After deploying the model endpoint, you can access and test it by using the scri
python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/gpt2
```

## Getting Started With Docker
This guide will assist you in setting up LLM-on-Ray on With Docker.

```bash
git clone https://github.com/intel/llm-on-ray.git
cd llm-on-ray
```
The dockerfile for user is in `dev/docker/Dockerfile.user`.

#### 1. Source Docker Functions
Detailed parameter can be set up for docker in `dev/scripts/docker-functions.sh`.
```bash
source dev/scripts/docker-functions.sh
```

#### 2. Build Docker Image
Default cpu and deepspeed for llm serving.
```bash
build_docker
```

Change build_docker fuction's args for different environment.

Use vllm for llm serving.
```bash
build_docker vllm
```

Use ipex-vllm for llm serving.
```bash
build_docker ipex-llm
```

#### 3. Start Docker
Change any settings in `dev/scripts/docker-functions.sh`.

Run docker with cpu and gpt2 serving.
```bash
start_docker
```

Run docker with cpu and other support models serving.
```bash
start_docker llama-2-7b-chat-hf
```

Run docker with different environment and other models `start_docker {environment} {models}` like:
```bash
start_docker vllm llama-2-7b-chat-hf
```

#### 4. Start LLM-on-Ray
The model serving port in docker container has been mapped to local.

Using requests library.
```bash
python examples/inference/api_server_openai/query_http_requests.py
```

Using OpenAI SDK
```bash
python examples/inference/api_server_openai/query_openai_sdk.py
```

## Documents
The following are detailed guidelines for pretraining, finetuning and serving LLMs in various computing environment.

Expand Down
49 changes: 49 additions & 0 deletions dev/docker/Dockerfile.user
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# syntax=docker/dockerfile:1
FROM ubuntu:22.04

# Define build arguments
ARG DOCKER_NAME=default
ARG PYPJ=default
ENV LANG C.UTF-8

WORKDIR /root/

RUN --mount=type=cache,target=/var/cache/apt apt-get update -y \
&& apt-get install -y build-essential cmake wget curl git vim htop ssh net-tools \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

ENV CONDA_DIR /opt/conda
RUN wget --quiet https://github.com/conda-forge/miniforge/releases/download/23.3.1-1/Miniforge3-Linux-x86_64.sh -O ~/miniforge.sh && \
/bin/bash ~/miniforge.sh -b -p /opt/conda
ENV PATH $CONDA_DIR/bin:$PATH

# setup env
SHELL ["/bin/bash", "--login", "-c"]

RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
unset -f conda && \
export PATH=$CONDA_DIR/bin/:${PATH} && \
mamba config --add channels intel && \
mamba install -y -c conda-forge python==3.9 gxx=12.3 gxx_linux-64=12.3 libxcrypt

# Used to invalidate docker build cache with --build-arg CACHEBUST=$(date +%s)
ARG CACHEBUST=1

RUN git clone https://github.com/intel/llm-on-ray.git
RUN if [ -d "llm-on-ray" ]; then echo "Clone successful"; else echo "Clone failed" && exit 1; fi
WORKDIR /root/llm-on-ray


RUN ls -la

RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[${PYPJ}] --extra-index-url https://download.pytorch.org/whl/cpu \
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/

# Use shell scripting to conditionally install packages
RUN if [ "${DOCKER_NAME}" = ".cpu_and_deepspeed" ]; then ds_report && ./dev/scripts/install-oneapi.sh;fi
RUN if [ "${DOCKER_NAME}" = ".ipex-llm" ]; then ./dev/scripts/install-oneapi.sh; fi
RUN if [ "${DOCKER_NAME}" = ".vllm" ]; then ./dev/scripts/install-vllm-cpu.sh; fi


ENTRYPOINT ["sh", "./dev/scripts/entrypoint_user.sh"]
7 changes: 6 additions & 1 deletion dev/docker/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
Dockerfiles for CI tests. There could be one Dockerfile with ARG declared to distinguish different pip extras. However, ARG will bust cache of 'pip install', which usually takes long time, when build docker image. Instead, we have two almost identical Dockerfiles here to improve CI efficiency.
# Dockerfiles for Users

* `Dockerfile.user` to build llm-on-ray docker image for running on Intel CPU.
* `Dockerfile.habana` to build llm-on-ray docker image for running on [Intel Gaudi AI accelerator](https://habana.ai/products/gaudi/).

__NOTICE:__ Dockerfiles in `ci/` are for CI tests only and not intended for daily use.
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ RUN ds_report
# Used to invalidate docker build cache with --build-arg CACHEBUST=$(date +%s)
ARG CACHEBUST=1
COPY ./dev/scripts/install-oneapi.sh /tmp
RUN /tmp/install-oneapi.sh
RUN /tmp/install-oneapi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ RUN ds_report
# Used to invalidate docker build cache with --build-arg CACHEBUST=$(date +%s)
ARG CACHEBUST=1
COPY ./dev/scripts/install-oneapi.sh /tmp
RUN /tmp/install-oneapi.sh
RUN /tmp/install-oneapi.sh
32 changes: 32 additions & 0 deletions dev/docker/ci/Dockerfile.habana
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
FROM vault.habana.ai/gaudi-docker/1.15.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest

ENV LANG=en_US.UTF-8

WORKDIR /root/llm-on-ray

COPY ./pyproject.toml .
COPY ./MANIFEST.in .

# create llm_on_ray package directory to bypass the following 'pip install -e' command
RUN mkdir ./llm_on_ray

RUN pip install -e . && \
pip install --upgrade-strategy eager optimum[habana] && \
pip install git+https://github.com/HabanaAI/[email protected]

# Optinal. Comment out if you are not using UI
COPY ./dev/scripts/install-ui.sh /tmp

RUN /tmp/install-ui.sh

RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \
service ssh restart

ENV no_proxy=localhost,127.0.0.1

# Required by DeepSpeed
ENV RAY_EXPERIMENTAL_NOSET_HABANA_VISIBLE_MODULES=1

ENV PT_HPU_LAZY_ACC_PAR_MODE=0

ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[ipex-llm] --extr
# Used to invalidate docker build cache with --build-arg CACHEBUST=$(date +%s)
ARG CACHEBUST=1
COPY ./dev/scripts/install-oneapi.sh /tmp
RUN /tmp/install-oneapi.sh
RUN /tmp/install-oneapi.sh
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu] --extra-ind
# Install vllm-cpu
# Activate base first for loading g++ envs ($CONDA_PREFIX/etc/conda/activate.d/*)
RUN --mount=type=cache,target=/root/.cache/pip \
source /opt/conda/bin/activate base && ./install-vllm-cpu.sh
source /opt/conda/bin/activate base && ./install-vllm-cpu.sh
4 changes: 2 additions & 2 deletions dev/scripts/ci-functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ build_and_prune() {
fi

echo "Build Docker image and perform cleaning operation"
echo "docker build ./ ${docker_args[@]} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${TARGET}:latest && yes | docker container prune && yes | docker image prune -f"
echo "docker build ./ ${docker_args[@]} -f dev/docker/ci/Dockerfile${DF_SUFFIX} -t ${TARGET}:latest && yes | docker container prune && yes | docker image prune -f"

# Build Docker image and perform cleaning operation
docker build ./ "${docker_args[@]}" -f dev/docker/Dockerfile${DF_SUFFIX} -t ${TARGET}:latest && yes | docker container prune && yes
docker build ./ "${docker_args[@]}" -f dev/docker/ci/Dockerfile${DF_SUFFIX} -t ${TARGET}:latest && yes | docker container prune && yes
docker image prune -f

}
Expand Down
75 changes: 75 additions & 0 deletions dev/scripts/docker-functions.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/bash
set -eo pipefail

# If your model needs HF_TOKEN. Please modify the "model_description.config.use_auth_token" in the config file such as "llm_on_ray/inference/models/llama-2-7b-chat-hf.yaml"
# Mount your own llm-on-ray directory here
code_checkout_path=$PWD
# Mount your own huggingface cache path here
model_cache_path=$HOME'/.cache/huggingface/hub'
MODEL_CACHE_PATH_LOACL='/root/.cache/huggingface/hub'
CODE_CHECKOUT_PATH_LOCAL='/root/llm-on-ray'


build_docker() {
local DOCKER_NAME=$1

docker_args=()
docker_args+=("--build-arg=CACHEBUST=1")
if [ "$DOCKER_NAME" == "vllm" ]; then
docker_args+=("--build-arg=DOCKER_NAME=".vllm"")
docker_args+=("--build-arg=PYPJ="vllm"")
elif [ "$DOCKER_NAME" == "ipex-llm" ]; then
docker_args+=("--build-arg=DOCKER_NAME=".ipex-llm"")
docker_args+=("--build-arg=PYPJ="ipex-llm"")
else
docker_args+=("--build-arg=DOCKER_NAME=".cpu_and_deepspeed"")
docker_args+=("--build-arg=PYPJ="cpu,deepspeed"")
fi

if [ -n "$http_proxy" ]; then
docker_args+=("--build-arg=http_proxy=$http_proxy")
fi

if [ -n "$https_proxy" ]; then
docker_args+=("--build-arg=https_proxy=$http_proxy")
fi


echo "Build Docker image and perform cleaning operation"
echo "docker build ./ ${docker_args[@]} -f dev/docker/Dockerfile.user -t serving${DOCKER_NAME}:latest"

# Build Docker image and perform cleaning operation
docker build ./ "${docker_args[@]}" -f dev/docker/Dockerfile.user -t serving${DOCKER_NAME}:latest

}

start_docker() {
local DOCKER_NAME=$1
local MODEL_NAME=$2

docker_args=()
docker_args+=("--name=serving${DOCKER_NAME}" )
if [ -z "$MODEL_NAME" ]; then
echo "use default model"
else
docker_args+=("-e=model_name=${MODEL_NAME}")
fi

if [ -n "$http_proxy" ]; then
docker_args+=("-e=http_proxy=$http_proxy")
fi

if [ -n "$https_proxy" ]; then
docker_args+=("-e=https_proxy=$http_proxy")
fi

docker_args+=("-e=OPENAI_BASE_URL=${OPENAI_BASE_URL:-http://localhost:8000/v1}")
docker_args+=("-e=OPENAI_API_KEY=${OPENAI_API_KEY:-not_a_real_key}")

# # If you need to use the modified llm-on-ray repository or huggingface model cache, activate the corresponding row
docker_args+=("-v=$code_checkout_path:${CODE_CHECKOUT_PATH_LOCAL}")
docker_args+=("-v=${model_cache_path}:${MODEL_CACHE_PATH_LOACL}")

echo "docker run -ti ${docker_args[@]} serving${DOCKER_NAME}:latest"
docker run -ti ${docker_args[@]} serving${DOCKER_NAME}:latest
}
24 changes: 24 additions & 0 deletions dev/scripts/entrypoint_user.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
set -e

# Default serve cmd
if ! pgrep -f 'ray'; then
echo "Ray is not running. Starting Ray..."
# start Ray
ray start --head
echo "Ray started."
else
echo "Ray is already running."
fi
# Prepare for openai related
pip install openai>=1.0

if [ -n "$model_name" ]; then
echo "Using User Model: $model_name"
llm_on_ray-serve --models $model_name --keep_serve_terminal
else
echo "Using Default Model: gpt2"
llm_on_ray-serve --config_file llm_on_ray/inference/models/gpt2.yaml --keep_serve_terminal
fi

exec /bin/bash

0 comments on commit cf5d32b

Please sign in to comment.