From e8e62b44927740ee68d9c5069fb21b81581c0bb5 Mon Sep 17 00:00:00 2001 From: Ankith Gunapal Date: Thu, 25 Jan 2024 14:08:05 -0800 Subject: [PATCH 1/5] Changes to support TorchServe on cpu --- 1-build/Dockerfile-base-cpu | 20 +++++++++++++++----- 5-test/tests/benchmark.sh | 29 ++++++++++++++--------------- 5-test/tests/benchmark_client.py | 10 ++++++++-- build.sh | 15 ++++++++++++--- config.properties | 4 ++++ pack.sh | 15 ++++++++++++--- trace.sh | 27 +++++++++++++++++---------- 7 files changed, 82 insertions(+), 38 deletions(-) diff --git a/1-build/Dockerfile-base-cpu b/1-build/Dockerfile-base-cpu index 9bb5846..666c9d4 100644 --- a/1-build/Dockerfile-base-cpu +++ b/1-build/Dockerfile-base-cpu @@ -1,9 +1,19 @@ -FROM python:3.9 +ARG BASE_IMAGE=python:3.9 + +FROM ${BASE_IMAGE} +ARG BASE_IMAGE=python:3.9 +ARG FRAMEWORK=fastapi -LABEL description="Base container for CPU models" -RUN apt-get update && apt-get install -y htop dnsutils bc vim +LABEL description="Base container for CPU models" -RUN pip install torch configparser transformers +USER root -RUN echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc \ No newline at end of file +RUN if [ "$FRAMEWORK" = "fastapi" ]; then \ + apt-get update && apt-get install -y htop dnsutils bc vim; \ + pip install torch configparser transformers; \ + echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc; \ + else \ + apt-get update && apt-get install -y wget; \ + pip install configparser transformers; \ + fi diff --git a/5-test/tests/benchmark.sh b/5-test/tests/benchmark.sh index 9ec1a0d..f21bce9 100755 --- a/5-test/tests/benchmark.sh +++ b/5-test/tests/benchmark.sh @@ -5,27 +5,26 @@ # SPDX-License-Identifier: MIT-0 # ###################################################################### -if [ "$num_servers" == "" ]; then +echo "Configuring number of model servers from config.properties ..." - echo "Configuring number of model servers from config.properties ..." - - if [ -f ../config.properties ]; then - source ../config.properties - elif [ -f ../../config.properties ]; then - source ../../config.properties - elif [ -f ./config.properties ]; then - source ./config.properties - else - echo "config.properties not found!" - fi +if [ -f ../config.properties ]; then + source ../config.properties +elif [ -f ../../config.properties ]; then + source ../../config.properties +elif [ -f ./config.properties ]; then + source ./config.properties else - echo "Number of model servers ($num_servers) configured from environment ..." + echo "config.properties not found!" fi + echo "Number of model servers ($num_servers) configured from environment ..." + + + if [ "$runtime" == "docker" ]; then - python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX]:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns + python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX]:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --framework ${framework} elif [ "$runtime" == "kubernetes" ]; then - python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX].${namespace}.svc.cluster.local:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns + python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX].${namespace}.svc.cluster.local:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --framework ${framework} else echo "Runtime $runtime not recognized" fi diff --git a/5-test/tests/benchmark_client.py b/5-test/tests/benchmark_client.py index 9223b6a..a205aaa 100644 --- a/5-test/tests/benchmark_client.py +++ b/5-test/tests/benchmark_client.py @@ -39,6 +39,8 @@ parser.add_argument('--post', default=False, action='store_true') parser.add_argument('--verbose', default=False, action='store_true') parser.add_argument('--cache_dns', default=False, action='store_true') + parser.add_argument('--framework', help='Server framework', type=str, + default=f'fastapi') args, leftovers = parser.parse_known_args() @@ -51,8 +53,12 @@ if is_multi_model_per_instance: n_model_per_instance = args.n_model_per_instance - data = {'seq_0': "how many chapters the book has?", - 'seq_1': """The number 42 is, in The Hitchhiker's Guide to the Galaxy by Douglas Adams."""} + if args.framework == "fastapi": + data = {"seq_0": "how many chapters the book has?", + "seq_1": """The number 42 is, in The Hitchhiker's Guide to the Galaxy by Douglas Adams."""} + elif args.framework == "torchserve": + data = "Bloomberg has decided to publish a new report on global economic situation." + args.post = True live = True num_infer = 0 latency_list = [] diff --git a/build.sh b/build.sh index c79b11a..8b186ba 100755 --- a/build.sh +++ b/build.sh @@ -5,6 +5,9 @@ # SPDX-License-Identifier: MIT-0 # ###################################################################### +BASE_IMAGE=python:3.9 +FRAMEWORK=fastapi + print_help() { echo "" echo "Usage: $0 [arg]" @@ -24,14 +27,20 @@ action=$1 if [ "$action" == "" ]; then source ./config.properties + if [ "$framework" == "torchserve" ] + then + BASE_IMAGE=pytorch/torchserve:latest-cpu + FRAMEWORK=torchserve + fi + echo "" - echo "Building base container ..." + echo "Building base container ... " echo "" dockerfile=./1-build/Dockerfile-base-${processor} if [ -f $dockerfile ]; then - echo " ... base-${processor} ..." - docker build -t ${registry}${base_image_name}${base_image_tag} -f $dockerfile . + echo " ... base-${processor} ... " + docker build --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg FRAMEWORK="${FRAMEWORK}" -t ${registry}${base_image_name}${base_image_tag} -f $dockerfile . else echo "Dockerfile $dockerfile was not found." echo "Please ensure that processor is configured with a supported value in config.properties" diff --git a/config.properties b/config.properties index 9d1c989..7ce6042 100644 --- a/config.properties +++ b/config.properties @@ -8,6 +8,10 @@ # SPDX-License-Identifier: MIT-0 # ###################################################################### +# Model Serving Framework settings +# framework = fastapi|torchserve +framework=fastapi + # Model settings huggingface_model_name=bert-base-multilingual-cased huggingface_tokenizer_class=BertTokenizer diff --git a/pack.sh b/pack.sh index c82a103..e57ca0a 100755 --- a/pack.sh +++ b/pack.sh @@ -27,9 +27,18 @@ action=$1 if [ "$action" == "" ]; then model_file_name=${huggingface_model_name}_bs${batch_size}_seq${sequence_length}_pc${pipeline_cores}_${processor}.pt - docker build -t ${registry}${model_image_name}${model_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \ - --build-arg MODEL_NAME=${huggingface_model_name} --build-arg MODEL_FILE_NAME=${model_file_name} --build-arg PROCESSOR=${processor} \ - -f 3-pack/Dockerfile . + if [ "$framework" == "torchserve" ] + then + docker build -t ${registry}${model_image_name}${model_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \ + --build-arg MODEL_NAME=${huggingface_model_name} --build-arg MODEL_FILE_NAME=${model_file_name} --build-arg PROCESSOR=${processor} \ + -f 3-pack/Dockerfile.torchserve . + fi + if [ "$framework" == "fastapi" ] + then + docker build -t ${registry}${model_image_name}${model_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \ + --build-arg MODEL_NAME=${huggingface_model_name} --build-arg MODEL_FILE_NAME=${model_file_name} --build-arg PROCESSOR=${processor} \ + -f 3-pack/Dockerfile . + fi elif [ "$action" == "push" ]; then ./3-pack/push.sh elif [ "$action" == "pull" ]; then diff --git a/trace.sh b/trace.sh index 4decc5a..6f89260 100755 --- a/trace.sh +++ b/trace.sh @@ -5,6 +5,8 @@ # SPDX-License-Identifier: MIT-0 # ###################################################################### +FRAMEWORK=fastapi + print_help() { echo "" echo "Usage: $0 " @@ -19,17 +21,22 @@ print_help() { if [ "$1" == "" ]; then source ./config.properties echo "" - echo "Tracing model: $huggingface_model_name ..." + if [ "$framework" == "torchserve" ] + then + echo "Skipping Tracing model: $huggingface_model_name for TorchServe..." + else + echo "Tracing model: $huggingface_model_name ..." - dockerfile=./1-build/Dockerfile-base-${processor} - echo "" - if [ -f $dockerfile ]; then - echo " ... for processor: $processor ..." - trace_opts=trace_opts_${processor} - docker run ${!trace_opts} -it --rm -v $(pwd)/2-trace:/app/trace -v $(pwd)/config.properties:/app/config.properties ${registry}${base_image_name}${base_image_tag} bash -c "cd /app/trace; python model-tracer.py" - else - echo "Processor $processor is not supported. Please ensure the processor setting in config.properties is configured properly" - exit 1 + dockerfile=./1-build/Dockerfile-base-${processor} + echo "" + if [ -f $dockerfile ]; then + echo " ... for processor: $processor ..." + trace_opts=trace_opts_${processor} + docker run ${!trace_opts} -it --rm -v $(pwd)/2-trace:/app/trace -v $(pwd)/config.properties:/app/config.properties ${registry}${base_image_name}${base_image_tag} bash -c "cd /app/trace; python model-tracer.py" + else + echo "Processor $processor is not supported. Please ensure the processor setting in config.properties is configured properly" + exit 1 + fi fi else print_help From b55acc129c8268b3a3d9c941f8dd6ac7934a0341 Mon Sep 17 00:00:00 2001 From: Ankith Gunapal Date: Thu, 25 Jan 2024 14:12:16 -0800 Subject: [PATCH 2/5] Changes to support TorchServe on cpu --- 3-pack/Dockerfile.torchserve | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 3-pack/Dockerfile.torchserve diff --git a/3-pack/Dockerfile.torchserve b/3-pack/Dockerfile.torchserve new file mode 100644 index 0000000..6080472 --- /dev/null +++ b/3-pack/Dockerfile.torchserve @@ -0,0 +1,20 @@ +ARG BASE_IMAGE + +FROM $BASE_IMAGE + +ARG MODEL_NAME +ARG MODEL_FILE_NAME +ARG PROCESSOR + + +LABEL description="Model $MODEL_NAME packed in a TorchServe container to run on $PROCESSOR" + +RUN wget https://torchserve.pytorch.org/mar_files/bert_seqc_without_torchscript.mar -O /home/model-server/model-store/BERTSC.mar + +WORKDIR /home/model-server + +ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] + +CMD ["serve"] + +CMD ["torchserve", "--start", "--ts-config", "/home/model-server/config.properties", "--models", "model0=BERTSC.mar"] \ No newline at end of file From 22ab3452f13b3fd351b901819b292f4543bd9ca2 Mon Sep 17 00:00:00 2001 From: Ankith Gunapal Date: Thu, 25 Jan 2024 15:22:45 -0800 Subject: [PATCH 3/5] changed from framework to model_server --- 1-build/Dockerfile-base-cpu | 4 ++-- 5-test/tests/benchmark.sh | 4 ++-- 5-test/tests/benchmark_client.py | 6 +++--- build.sh | 8 ++++---- config.properties | 6 +++--- pack.sh | 4 ++-- trace.sh | 4 ++-- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/1-build/Dockerfile-base-cpu b/1-build/Dockerfile-base-cpu index 666c9d4..9303573 100644 --- a/1-build/Dockerfile-base-cpu +++ b/1-build/Dockerfile-base-cpu @@ -2,14 +2,14 @@ ARG BASE_IMAGE=python:3.9 FROM ${BASE_IMAGE} ARG BASE_IMAGE=python:3.9 -ARG FRAMEWORK=fastapi +ARG MODEL_SERVER=fastapi LABEL description="Base container for CPU models" USER root -RUN if [ "$FRAMEWORK" = "fastapi" ]; then \ +RUN if [ "$MODEL_SERVER" = "fastapi" ]; then \ apt-get update && apt-get install -y htop dnsutils bc vim; \ pip install torch configparser transformers; \ echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc; \ diff --git a/5-test/tests/benchmark.sh b/5-test/tests/benchmark.sh index f21bce9..d557323 100755 --- a/5-test/tests/benchmark.sh +++ b/5-test/tests/benchmark.sh @@ -22,9 +22,9 @@ fi if [ "$runtime" == "docker" ]; then - python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX]:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --framework ${framework} + python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX]:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --model_server ${model_server} elif [ "$runtime" == "kubernetes" ]; then - python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX].${namespace}.svc.cluster.local:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --framework ${framework} + python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX].${namespace}.svc.cluster.local:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --model_server ${model_server} else echo "Runtime $runtime not recognized" fi diff --git a/5-test/tests/benchmark_client.py b/5-test/tests/benchmark_client.py index a205aaa..8bbaff9 100644 --- a/5-test/tests/benchmark_client.py +++ b/5-test/tests/benchmark_client.py @@ -39,7 +39,7 @@ parser.add_argument('--post', default=False, action='store_true') parser.add_argument('--verbose', default=False, action='store_true') parser.add_argument('--cache_dns', default=False, action='store_true') - parser.add_argument('--framework', help='Server framework', type=str, + parser.add_argument('--model_server', help='Model Server', type=str, default=f'fastapi') args, leftovers = parser.parse_known_args() @@ -53,10 +53,10 @@ if is_multi_model_per_instance: n_model_per_instance = args.n_model_per_instance - if args.framework == "fastapi": + if args.model_server == "fastapi": data = {"seq_0": "how many chapters the book has?", "seq_1": """The number 42 is, in The Hitchhiker's Guide to the Galaxy by Douglas Adams."""} - elif args.framework == "torchserve": + elif args.model_server == "torchserve": data = "Bloomberg has decided to publish a new report on global economic situation." args.post = True live = True diff --git a/build.sh b/build.sh index 8b186ba..d5cf9d8 100755 --- a/build.sh +++ b/build.sh @@ -6,7 +6,7 @@ ###################################################################### BASE_IMAGE=python:3.9 -FRAMEWORK=fastapi +MODEL_SERVER=fastapi print_help() { echo "" @@ -27,10 +27,10 @@ action=$1 if [ "$action" == "" ]; then source ./config.properties - if [ "$framework" == "torchserve" ] + if [ "$model_server" == "torchserve" ] then BASE_IMAGE=pytorch/torchserve:latest-cpu - FRAMEWORK=torchserve + MODEL_SERVER=torchserve fi echo "" @@ -40,7 +40,7 @@ if [ "$action" == "" ]; then dockerfile=./1-build/Dockerfile-base-${processor} if [ -f $dockerfile ]; then echo " ... base-${processor} ... " - docker build --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg FRAMEWORK="${FRAMEWORK}" -t ${registry}${base_image_name}${base_image_tag} -f $dockerfile . + docker build --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg MODEL_SERVER="${MODEL_SERVER}" -t ${registry}${base_image_name}${base_image_tag} -f $dockerfile . else echo "Dockerfile $dockerfile was not found." echo "Please ensure that processor is configured with a supported value in config.properties" diff --git a/config.properties b/config.properties index 7ce6042..43b0c9a 100644 --- a/config.properties +++ b/config.properties @@ -8,9 +8,9 @@ # SPDX-License-Identifier: MIT-0 # ###################################################################### -# Model Serving Framework settings -# framework = fastapi|torchserve -framework=fastapi +# Model Server Config +# model_server = fastapi|torchserve +model_server=fastapi # Model settings huggingface_model_name=bert-base-multilingual-cased diff --git a/pack.sh b/pack.sh index e57ca0a..6197d44 100755 --- a/pack.sh +++ b/pack.sh @@ -27,13 +27,13 @@ action=$1 if [ "$action" == "" ]; then model_file_name=${huggingface_model_name}_bs${batch_size}_seq${sequence_length}_pc${pipeline_cores}_${processor}.pt - if [ "$framework" == "torchserve" ] + if [ "$model_server" == "torchserve" ] then docker build -t ${registry}${model_image_name}${model_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \ --build-arg MODEL_NAME=${huggingface_model_name} --build-arg MODEL_FILE_NAME=${model_file_name} --build-arg PROCESSOR=${processor} \ -f 3-pack/Dockerfile.torchserve . fi - if [ "$framework" == "fastapi" ] + if [ "$model_server" == "fastapi" ] then docker build -t ${registry}${model_image_name}${model_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \ --build-arg MODEL_NAME=${huggingface_model_name} --build-arg MODEL_FILE_NAME=${model_file_name} --build-arg PROCESSOR=${processor} \ diff --git a/trace.sh b/trace.sh index 6f89260..e66dddd 100755 --- a/trace.sh +++ b/trace.sh @@ -5,7 +5,7 @@ # SPDX-License-Identifier: MIT-0 # ###################################################################### -FRAMEWORK=fastapi +MODEL_SERVER=fastapi print_help() { echo "" @@ -21,7 +21,7 @@ print_help() { if [ "$1" == "" ]; then source ./config.properties echo "" - if [ "$framework" == "torchserve" ] + if [ "$model_server" == "torchserve" ] then echo "Skipping Tracing model: $huggingface_model_name for TorchServe..." else From db4197d6ee90cb2e33baa5c676ce784671c99e32 Mon Sep 17 00:00:00 2001 From: Ankith Gunapal Date: Mon, 8 Apr 2024 18:07:01 -0700 Subject: [PATCH 4/5] Changes to support GPU added --- 1-build/Dockerfile-base-cpu | 10 +++++---- 1-build/Dockerfile-base-gpu | 35 ++++++++++++++++++-------------- 3-pack/Dockerfile.torchserve | 13 ++++++------ 5-test/tests/benchmark_client.py | 8 +++----- build.sh | 2 +- 5 files changed, 37 insertions(+), 31 deletions(-) diff --git a/1-build/Dockerfile-base-cpu b/1-build/Dockerfile-base-cpu index 9303573..a8945e4 100644 --- a/1-build/Dockerfile-base-cpu +++ b/1-build/Dockerfile-base-cpu @@ -9,11 +9,13 @@ LABEL description="Base container for CPU models" USER root +RUN apt-get update && apt-get install -y htop dnsutils bc vim + +RUN pip install configparser + RUN if [ "$MODEL_SERVER" = "fastapi" ]; then \ - apt-get update && apt-get install -y htop dnsutils bc vim; \ - pip install torch configparser transformers; \ + pip install torch transformers; \ echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc; \ else \ - apt-get update && apt-get install -y wget; \ - pip install configparser transformers; \ + apt-get update && apt-get install -y curl; \ fi diff --git a/1-build/Dockerfile-base-gpu b/1-build/Dockerfile-base-gpu index 2f0dcbf..1e4c96c 100644 --- a/1-build/Dockerfile-base-gpu +++ b/1-build/Dockerfile-base-gpu @@ -1,20 +1,25 @@ -FROM nvidia/cuda:11.1.1-runtime-ubuntu20.04 +ARG BASE_IMAGE=nvidia/cuda:11.1.1-runtime-ubuntu20.04 +FROM ${BASE_IMAGE} +ARG BASE_IMAGE=nvidia/cuda:11.1.1-runtime-ubuntu20.04 +ARG MODEL_SERVER=fastapi LABEL description="Base container for GPU models" -RUN apt-get update && apt-get install -y htop vim wget curl software-properties-common debconf-utils python3-distutils dnsutils bc +USER root -# Install python3.9 -RUN DEBIAN_FRONTEND=noninteractive; add-apt-repository -y ppa:deadsnakes/ppa; apt install -y python3.9; update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1 +RUN apt-get update && apt-get install -y htop dnsutils bc vim curl +RUN pip install configparser -# Install pip -RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py; python get-pip.py; rm -f get-pip.py - -# Install pytorch with GPU support -RUN pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html - -RUN echo "PATH=/usr/local/cuda/bin\${PATH:+:\${PATH}}" >> /etc/environment -RUN echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}" >> /etc/environment - -# Install other python libraries -RUN pip install transformers configparser +RUN if [ "$MODEL_SERVER" = "fastapi" ]; then \ + apt-get update && apt-get install -y wget software-properties-common debconf-utils python3-distutils ; \ + # Install python3.9 + DEBIAN_FRONTEND=noninteractive; add-apt-repository -y ppa:deadsnakes/ppa; apt install -y python3.9; update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1;\ + # Install pip + curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py; python get-pip.py; rm -f get-pip.py; \ + # Install pytorch with GPU support + pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html; \ + echo "PATH=/usr/local/cuda/bin\${PATH:+:\${PATH}}" >> /etc/environment; \ + echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}" >> /etc/environment; \ + # Install other python libraries + pip install transformers ; \ + fi diff --git a/3-pack/Dockerfile.torchserve b/3-pack/Dockerfile.torchserve index 6080472..3779b78 100644 --- a/3-pack/Dockerfile.torchserve +++ b/3-pack/Dockerfile.torchserve @@ -1,5 +1,4 @@ ARG BASE_IMAGE - FROM $BASE_IMAGE ARG MODEL_NAME @@ -9,12 +8,14 @@ ARG PROCESSOR LABEL description="Model $MODEL_NAME packed in a TorchServe container to run on $PROCESSOR" -RUN wget https://torchserve.pytorch.org/mar_files/bert_seqc_without_torchscript.mar -O /home/model-server/model-store/BERTSC.mar - WORKDIR /home/model-server -ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] +COPY 3-pack/torchserve torchserve + +WORKDIR /home/model-server/torchserve +USER root +COPY 3-pack/torchserve/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh -CMD ["serve"] +RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh \ + && chown -R model-server /home/model-server -CMD ["torchserve", "--start", "--ts-config", "/home/model-server/config.properties", "--models", "model0=BERTSC.mar"] \ No newline at end of file diff --git a/5-test/tests/benchmark_client.py b/5-test/tests/benchmark_client.py index 8bbaff9..917c4ac 100644 --- a/5-test/tests/benchmark_client.py +++ b/5-test/tests/benchmark_client.py @@ -53,11 +53,9 @@ if is_multi_model_per_instance: n_model_per_instance = args.n_model_per_instance - if args.model_server == "fastapi": - data = {"seq_0": "how many chapters the book has?", - "seq_1": """The number 42 is, in The Hitchhiker's Guide to the Galaxy by Douglas Adams."""} - elif args.model_server == "torchserve": - data = "Bloomberg has decided to publish a new report on global economic situation." + data = {"seq_0": "how many chapters the book has?", + "seq_1": """The number 42 is, in The Hitchhiker's Guide to the Galaxy by Douglas Adams."""} + if args.model_server == "torchserve": args.post = True live = True num_infer = 0 diff --git a/build.sh b/build.sh index d5cf9d8..f9514b5 100755 --- a/build.sh +++ b/build.sh @@ -29,7 +29,7 @@ if [ "$action" == "" ]; then if [ "$model_server" == "torchserve" ] then - BASE_IMAGE=pytorch/torchserve:latest-cpu + BASE_IMAGE=pytorch/torchserve:latest-${processor} MODEL_SERVER=torchserve fi From 2379e4f59a05b109ccf2a6c2355f6b97fd8907c0 Mon Sep 17 00:00:00 2001 From: Ankith Gunapal Date: Mon, 8 Apr 2024 18:12:51 -0700 Subject: [PATCH 5/5] Changes to build MAR file dynamically --- 3-pack/torchserve/dockerd-entrypoint.sh | 17 +++ 3-pack/torchserve/download_model.py | 116 ++++++++++++++++ 3-pack/torchserve/handler.py | 167 ++++++++++++++++++++++++ 3-pack/torchserve/model-config.yaml | 6 + 3-pack/torchserve/requirements.txt | 2 + 3-pack/torchserve/setup_config.json | 7 + 6 files changed, 315 insertions(+) create mode 100644 3-pack/torchserve/dockerd-entrypoint.sh create mode 100644 3-pack/torchserve/download_model.py create mode 100644 3-pack/torchserve/handler.py create mode 100644 3-pack/torchserve/model-config.yaml create mode 100644 3-pack/torchserve/requirements.txt create mode 100644 3-pack/torchserve/setup_config.json diff --git a/3-pack/torchserve/dockerd-entrypoint.sh b/3-pack/torchserve/dockerd-entrypoint.sh new file mode 100644 index 0000000..325005c --- /dev/null +++ b/3-pack/torchserve/dockerd-entrypoint.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -e + +if [[ "$1" = "serve" ]]; then + shift 1 + + pip install -r requirements.txt + python download_model.py + torch-model-archiver --model-name BERTQA --version 1.0 --handler handler.py --config-file model-config.yaml --extra-files "./setup_config.json" --archive-format no-archive --export-path /home/model-server/model-store -f + mv Transformer_model /home/model-server/model-store/BERTQA/ + torchserve --start --ts-config /home/model-server/config.properties --models model0=BERTQA +else + eval "$@" +fi + +# prevent docker exit +tail -f /dev/null diff --git a/3-pack/torchserve/download_model.py b/3-pack/torchserve/download_model.py new file mode 100644 index 0000000..546fd5e --- /dev/null +++ b/3-pack/torchserve/download_model.py @@ -0,0 +1,116 @@ +import json +import os +import sys + +import torch +import transformers +from transformers import ( + AutoConfig, + AutoModelForCausalLM, + AutoModelForQuestionAnswering, + AutoModelForSequenceClassification, + AutoModelForTokenClassification, + AutoTokenizer, + set_seed, +) + +print("Transformers version", transformers.__version__) +set_seed(1) +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + +def transformers_model_dowloader( + mode, + pretrained_model_name, + do_lower_case, + max_length, + torchscript, + hardware, + batch_size, +): + """This function, save the checkpoint, config file along with tokenizer config and vocab files + of a transformer model of your choice. + """ + print("Download model and tokenizer", pretrained_model_name) + # loading pre-trained model and tokenizer + config = AutoConfig.from_pretrained( + pretrained_model_name, torchscript=torchscript + ) + model = AutoModelForQuestionAnswering.from_pretrained( + pretrained_model_name, config=config + ) + tokenizer = AutoTokenizer.from_pretrained( + pretrained_model_name, do_lower_case=do_lower_case + ) + + NEW_DIR = "./Transformer_model" + try: + os.mkdir(NEW_DIR) + except OSError: + print("Creation of directory %s failed" % NEW_DIR) + else: + print("Successfully created directory %s " % NEW_DIR) + + print( + "Save model and tokenizer/ Torchscript model based on the setting from setup_config", + pretrained_model_name, + "in directory", + NEW_DIR, + ) + if save_mode == "pretrained": + model.save_pretrained(NEW_DIR) + tokenizer.save_pretrained(NEW_DIR) + elif save_mode == "torchscript": + dummy_input = "This is a dummy input for torch jit trace" + question = "What does the little engine say?" + + context = """In the childrens story about the little engine a small locomotive is pulling a large load up a mountain. + Since the load is heavy and the engine is small it is not sure whether it will be able to do the job. This is a story + about how an optimistic attitude empowers everyone to achieve more. In the story the little engine says: 'I think I can' as it is + pulling the heavy load all the way to the top of the mountain. On the way down it says: I thought I could.""" + inputs = tokenizer.encode_plus( + question, + context, + max_length=int(max_length), + padding='max_length', + add_special_tokens=True, + return_tensors="pt", + truncation=True + ) + model.to(device).eval() + input_ids = inputs["input_ids"].to(device) + attention_mask = inputs["attention_mask"].to(device) + traced_model = torch.jit.trace(model, (input_ids, attention_mask)) + torch.jit.save(traced_model, os.path.join(NEW_DIR, "traced_model.pt")) + return + + +if __name__ == "__main__": + dirname = os.path.dirname(__file__) + if len(sys.argv) > 1: + filename = os.path.join(dirname, sys.argv[1]) + else: + filename = os.path.join(dirname, "setup_config.json") + f = open(filename) + settings = json.load(f) + mode = settings["mode"] + model_name = settings["model_name"] + do_lower_case = settings["do_lower_case"] + max_length = settings["max_length"] + save_mode = settings["save_mode"] + if save_mode == "torchscript": + torchscript = True + else: + torchscript = False + hardware = settings.get("hardware") + batch_size = int(settings.get("batch_size", "1")) + + transformers_model_dowloader( + mode, + model_name, + do_lower_case, + max_length, + torchscript, + hardware, + batch_size, + ) diff --git a/3-pack/torchserve/handler.py b/3-pack/torchserve/handler.py new file mode 100644 index 0000000..5f92e36 --- /dev/null +++ b/3-pack/torchserve/handler.py @@ -0,0 +1,167 @@ +import ast +import json +import logging +import os + +import torch +import transformers +from transformers import ( + AutoModelForQuestionAnswering, + AutoTokenizer, +) +from optimum.bettertransformer import BetterTransformer + +from ts.torch_handler.base_handler import BaseHandler + +logger = logging.getLogger(__name__) +logger.info("Transformers version %s", transformers.__version__) + + +class TransformersSeqClassifierHandler(BaseHandler): + """ + Transformers handler class for sequence, token classification and question answering. + """ + + def __init__(self): + super(TransformersSeqClassifierHandler, self).__init__() + self.initialized = False + + def initialize(self, ctx): + """In this initialize function, the BERT model is loaded and + the Layer Integrated Gradients Algorithm for Captum Explanations + is initialized here. + Args: + ctx (context): It is a JSON Object containing information + pertaining to the model artifacts parameters. + """ + self.manifest = ctx.manifest + properties = ctx.system_properties + model_dir = properties.get("model_dir") + model_weights_dir = ctx.model_yaml_config["handler"]["model_dir"] + + self.device = torch.device( + "cuda:" + str(properties.get("gpu_id")) + if torch.cuda.is_available() and properties.get("gpu_id") is not None + else "cpu" + ) + # read configs for the mode, model_name, etc. from setup_config.json + setup_config_path = os.path.join(model_dir, "setup_config.json") + if os.path.isfile(setup_config_path): + with open(setup_config_path) as setup_config_file: + self.setup_config = json.load(setup_config_file) + else: + logger.warning("Missing the setup_config.json file.") + + # Loading the model and tokenizer from checkpoint and config files based on the user's choice of mode + # further setup config can be added. + if self.setup_config["save_mode"] == "torchscript": + serialized_file = "traced_model.pt" + model_pt_path = os.path.join(model_weights_dir, serialized_file) + self.model = torch.jit.load(model_pt_path, map_location=self.device) + elif self.setup_config["save_mode"] == "pretrained": + self.model = AutoModelForQuestionAnswering.from_pretrained(model_weights_dir) + + try: + self.model = BetterTransformer.transform(self.model) + except RuntimeError as error: + logger.warning( + "HuggingFace Optimum is not supporting this model,for the list of supported models, please refer to this doc,https://huggingface.co/docs/optimum/bettertransformer/overview" + ) + self.model.to(self.device) + + if self.setup_config["save_mode"] == "pretrained": + self.tokenizer = AutoTokenizer.from_pretrained( + self.setup_config["model_name"], + do_lower_case=self.setup_config["do_lower_case"], + ) + else: + self.tokenizer = AutoTokenizer.from_pretrained( + model_dir, + do_lower_case=self.setup_config["do_lower_case"], + ) + + self.model.eval() + logger.info("Transformer model from path %s loaded successfully", model_dir) + + self.initialized = True + + def preprocess(self, requests): + """Basic text preprocessing, based on the user's chocie of application mode. + Args: + requests (str): The Input data in the form of text is passed on to the preprocess + function. + Returns: + list : The preprocess function returns a list of Tensor for the size of the word tokens. + """ + input_ids_batch = None + attention_mask_batch = None + logger.info(f"req: {requests}") + for idx, input_text in enumerate(requests): + max_length = self.setup_config["max_length"] + logger.info("Received text: '%s'", input_text) + + question = input_text["seq_0"].decode("utf-8") + context = input_text["seq_1"].decode("utf-8") + logger.info(f" question: {question}") + logger.info(f"context: {context}") + inputs = self.tokenizer.encode_plus( + question, + context, + max_length=int(max_length), + padding='max_length', + add_special_tokens=True, + return_tensors="pt", + truncation=True + ) + input_ids = inputs["input_ids"].to(self.device) + attention_mask = inputs["attention_mask"].to(self.device) + # making a batch out of the recieved requests + # attention masks are passed for cases where input tokens are padded. + if input_ids.shape is not None: + if input_ids_batch is None: + input_ids_batch = input_ids + attention_mask_batch = attention_mask + else: + input_ids_batch = torch.cat((input_ids_batch, input_ids), 0) + attention_mask_batch = torch.cat( + (attention_mask_batch, attention_mask), 0 + ) + return (input_ids_batch, attention_mask_batch) + + def inference(self, input_batch): + """Predict the class (or classes) of the received text using the + serialized transformers checkpoint. + Args: + input_batch (list): List of Text Tensors from the pre-process function is passed here + Returns: + list : It returns a list of the predicted value for the input text + """ + input_ids_batch, attention_mask_batch = input_batch + inferences = [] + # the output should be only answer_start and answer_end + # we are outputing the words just for demonstration. + output = self.model( + input_ids_batch, attention_mask_batch + ) + answer_text = str(output[0]) + answer_start = torch.argmax(output[0]) + answer_end = torch.argmax(output[1])+1 + if (answer_end > answer_start): + answer_text = self.tokenizer.convert_tokens_to_string(self.tokenizer.convert_ids_to_tokens(input_ids_batch[0][answer_start:answer_end])) + else: + answer_text = self.tokenizer.convert_tokens_to_string(self.tokenizer.convert_ids_to_tokens(input_ids_batch[0][answer_start:])) + inferences.append(answer_text) + logger.info("Model predicted: '%s'", answer_text) + + + print("Generated text", inferences) + return inferences + + def postprocess(self, inference_output): + """Post Process Function converts the predicted response into Torchserve readable format. + Args: + inference_output (list): It contains the predicted response of the input text. + Returns: + (list): Returns a list of the Predictions and Explanations. + """ + return inference_output diff --git a/3-pack/torchserve/model-config.yaml b/3-pack/torchserve/model-config.yaml new file mode 100644 index 0000000..fb86520 --- /dev/null +++ b/3-pack/torchserve/model-config.yaml @@ -0,0 +1,6 @@ +minWorkers: 1 +maxWorkers: 1 +batchSize: 1 +responseTimeout: 240 +handler: + model_dir: "Transformer_model" diff --git a/3-pack/torchserve/requirements.txt b/3-pack/torchserve/requirements.txt new file mode 100644 index 0000000..196e970 --- /dev/null +++ b/3-pack/torchserve/requirements.txt @@ -0,0 +1,2 @@ +transformers +optimum diff --git a/3-pack/torchserve/setup_config.json b/3-pack/torchserve/setup_config.json new file mode 100644 index 0000000..7c4597e --- /dev/null +++ b/3-pack/torchserve/setup_config.json @@ -0,0 +1,7 @@ +{ + "model_name":"bert-base-multilingual-cased", + "mode":"question_answering", + "do_lower_case":true, + "save_mode":"pretrained", + "max_length":"128" +}