update TensorRT-LLM to 0.14.0 (#5)

* update TensorRT-LLM to 0.15.0.dev2024102900 * use new apis * install things with scripts from trt-llm * better cmdline options * Revert "use new apis" This reverts commit f7f4c07. * avoid OOM (default batch size is very high) * fix: ensure target release directory is removed recursively in clean option * update to tensorrt-llm 0.14.0 * update llguidance
guidance-ai · Nov 1, 2024 · 6c62d42 · 6c62d42
1 parent ceb95bc
commit 6c62d42
Show file tree

Hide file tree

Showing 8 changed files with 30 additions and 17 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/TensorRT-LLM b/TensorRT-LLM
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvcr.io/nvidia/tensorrt:24.08-py3 AS llgtrt_dev
+FROM nvcr.io/nvidia/tensorrt:24.09-py3 AS llgtrt_dev
 
 RUN apt-get update
 RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
@@ -26,9 +26,16 @@ RUN cargo install rustfilt
 
 # install tensorrt_llm packages
 RUN mkdir -p /tmp/reqs
+
 # if this fails, run 'git submodule update --init' first
 COPY TensorRT-LLM/examples/llama/requirements.txt /tmp/reqs/
-RUN cd /tmp/reqs && pip install -r requirements.txt
+# COPY TensorRT-LLM/docker/common/install_mpi4py.sh /tmp/reqs/
+# COPY TensorRT-LLM/docker/common/install_tensorrt.sh /tmp/reqs/
+
+# RUN bash /tmp/reqs/install_mpi4py.sh
+# RUN bash /tmp/reqs/install_tensorrt.sh
+
+RUN cd /tmp/reqs && pip install --upgrade -r requirements.txt
 
 # more packages for this image
 RUN pip install \
@@ -38,7 +45,7 @@ RUN pip install \
  pandas matplotlib plotly wheel
 RUN pip uninstall -y guidance
 
-RUN pip install --upgrade transformers
+# RUN pip install --upgrade transformers
 
 RUN cd /usr/local/lib/python3.10/dist-packages/tensorrt_llm/libs/ && \
     ln -s libnvinfer_plugin_tensorrt_llm.so libnvinfer_plugin_tensorrt_llm.so.10

diff --git a/docker/build.sh b/docker/build.sh
@@ -5,4 +5,7 @@ cd $(dirname $0)/..
 if [ ! -f TensorRT-LLM/README.md ] ; then git submodule update --init ; fi
 
 docker build --progress=plain -t llgtrt/llgtrt:dev --target llgtrt_dev . -f docker/Dockerfile
-docker build --progress=plain -t llgtrt/llgtrt:latest --target llgtrt_prod . -f docker/Dockerfile
+
+if [ "$1" != "--dev" ] ; then
+    docker build --progress=plain -t llgtrt/llgtrt:latest --target llgtrt_prod . -f docker/Dockerfile
+fi
diff --git a/llgtrt/run.sh b/llgtrt/run.sh
@@ -1,9 +1,11 @@
 #!/bin/bash
 
-# ENGINE=/root/trt-cache/Meta-Llama-3.1-8B-Instruct-engine/
-# ENGINE=/root/trt-cache/llama-8b-1tp
-# ENGINE=/root/trt-cache/engine-llama3.1-70b-4tp
-ENGINE=/root/trt-cache/llama-8b-1tp
+if [ -z "$1" ] ; then
+    ENGINE=${ENGINE:-/root/trt-cache/llama-8b-1tp}
+else
+    ENGINE="$1"
+    shift
+fi
 
 set -e
 

diff --git a/llguidance b/llguidance
diff --git a/scripts/build.sh b/scripts/build.sh
@@ -5,7 +5,7 @@ while test $# -gt 0; do
     case "$1" in
         --clean)
             rm -rf trtllm-c/build
-            rm target/release/* 2>/dev/null || :
+            rm -rf target/release/* 2>/dev/null || :
             shift
             ;;
         *)

diff --git a/scripts/trtbld.sh b/scripts/trtbld.sh
@@ -4,15 +4,15 @@ set -e
 cd $(dirname $0)/..
 SELF=./scripts/trtbld.sh
 
-CACHE=/root/trt-cache
-MODEL=Meta-Llama-3.1-8B-Instruct
+CACHE=${CACHE:-/root/trt-cache}
+MODEL=${MODEL:-Meta-Llama-3.1-8B-Instruct}
 LLAMA_EXAMPLE=$(pwd)/TensorRT-LLM/examples/llama
 MODEL_SRC=$CACHE/$MODEL-hf
 
 CKPT=$CACHE/$MODEL-ckpt
 ENGINE_DIR=$CACHE/$MODEL-engine
 
-TP_SIZE=1
+TP_SIZE=${TP_SIZE:-1}
 
 set -x
 
@@ -39,7 +39,8 @@ case "$1" in
         trtllm-build --checkpoint_dir $CKPT \
             --gemm_plugin bfloat16 \
             --output_dir $ENGINE_DIR \
-            --use_paged_context_fmha enable
+            --use_paged_context_fmha enable \
+            --max_batch_size 128
         cp $MODEL_SRC/tokenizer* $ENGINE_DIR
         ;;