Skip to content

Commit

Permalink
update TensorRT-LLM to 0.14.0 (#5)
Browse files Browse the repository at this point in the history
* update TensorRT-LLM to 0.15.0.dev2024102900

* use new apis

* install things with scripts from trt-llm

* better cmdline options

* Revert "use new apis"

This reverts commit f7f4c07.

* avoid OOM (default batch size is very high)

* fix: ensure target release directory is removed recursively in clean option

* update to tensorrt-llm 0.14.0

* update llguidance
  • Loading branch information
mmoskal authored Nov 1, 2024
1 parent ceb95bc commit 6c62d42
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 17 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion TensorRT-LLM
Submodule TensorRT-LLM updated 430 files
13 changes: 10 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nvcr.io/nvidia/tensorrt:24.08-py3 AS llgtrt_dev
FROM nvcr.io/nvidia/tensorrt:24.09-py3 AS llgtrt_dev

RUN apt-get update
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
Expand Down Expand Up @@ -26,9 +26,16 @@ RUN cargo install rustfilt

# install tensorrt_llm packages
RUN mkdir -p /tmp/reqs

# if this fails, run 'git submodule update --init' first
COPY TensorRT-LLM/examples/llama/requirements.txt /tmp/reqs/
RUN cd /tmp/reqs && pip install -r requirements.txt
# COPY TensorRT-LLM/docker/common/install_mpi4py.sh /tmp/reqs/
# COPY TensorRT-LLM/docker/common/install_tensorrt.sh /tmp/reqs/

# RUN bash /tmp/reqs/install_mpi4py.sh
# RUN bash /tmp/reqs/install_tensorrt.sh

RUN cd /tmp/reqs && pip install --upgrade -r requirements.txt

# more packages for this image
RUN pip install \
Expand All @@ -38,7 +45,7 @@ RUN pip install \
pandas matplotlib plotly wheel
RUN pip uninstall -y guidance

RUN pip install --upgrade transformers
# RUN pip install --upgrade transformers

RUN cd /usr/local/lib/python3.10/dist-packages/tensorrt_llm/libs/ && \
ln -s libnvinfer_plugin_tensorrt_llm.so libnvinfer_plugin_tensorrt_llm.so.10
Expand Down
5 changes: 4 additions & 1 deletion docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@ cd $(dirname $0)/..
if [ ! -f TensorRT-LLM/README.md ] ; then git submodule update --init ; fi

docker build --progress=plain -t llgtrt/llgtrt:dev --target llgtrt_dev . -f docker/Dockerfile
docker build --progress=plain -t llgtrt/llgtrt:latest --target llgtrt_prod . -f docker/Dockerfile

if [ "$1" != "--dev" ] ; then
docker build --progress=plain -t llgtrt/llgtrt:latest --target llgtrt_prod . -f docker/Dockerfile
fi
10 changes: 6 additions & 4 deletions llgtrt/run.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#!/bin/bash

# ENGINE=/root/trt-cache/Meta-Llama-3.1-8B-Instruct-engine/
# ENGINE=/root/trt-cache/llama-8b-1tp
# ENGINE=/root/trt-cache/engine-llama3.1-70b-4tp
ENGINE=/root/trt-cache/llama-8b-1tp
if [ -z "$1" ] ; then
ENGINE=${ENGINE:-/root/trt-cache/llama-8b-1tp}
else
ENGINE="$1"
shift
fi

set -e

Expand Down
2 changes: 1 addition & 1 deletion llguidance
2 changes: 1 addition & 1 deletion scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ while test $# -gt 0; do
case "$1" in
--clean)
rm -rf trtllm-c/build
rm target/release/* 2>/dev/null || :
rm -rf target/release/* 2>/dev/null || :
shift
;;
*)
Expand Down
9 changes: 5 additions & 4 deletions scripts/trtbld.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ set -e
cd $(dirname $0)/..
SELF=./scripts/trtbld.sh

CACHE=/root/trt-cache
MODEL=Meta-Llama-3.1-8B-Instruct
CACHE=${CACHE:-/root/trt-cache}
MODEL=${MODEL:-Meta-Llama-3.1-8B-Instruct}
LLAMA_EXAMPLE=$(pwd)/TensorRT-LLM/examples/llama
MODEL_SRC=$CACHE/$MODEL-hf

CKPT=$CACHE/$MODEL-ckpt
ENGINE_DIR=$CACHE/$MODEL-engine

TP_SIZE=1
TP_SIZE=${TP_SIZE:-1}

set -x

Expand All @@ -39,7 +39,8 @@ case "$1" in
trtllm-build --checkpoint_dir $CKPT \
--gemm_plugin bfloat16 \
--output_dir $ENGINE_DIR \
--use_paged_context_fmha enable
--use_paged_context_fmha enable \
--max_batch_size 128
cp $MODEL_SRC/tokenizer* $ENGINE_DIR
;;

Expand Down

0 comments on commit 6c62d42

Please sign in to comment.