From e8e62b44927740ee68d9c5069fb21b81581c0bb5 Mon Sep 17 00:00:00 2001
From: Ankith Gunapal <agunapal@fb.com>
Date: Thu, 25 Jan 2024 14:08:05 -0800
Subject: [PATCH 1/5] Changes to support TorchServe on cpu

---
 1-build/Dockerfile-base-cpu      | 20 +++++++++++++++-----
 5-test/tests/benchmark.sh        | 29 ++++++++++++++---------------
 5-test/tests/benchmark_client.py | 10 ++++++++--
 build.sh                         | 15 ++++++++++++---
 config.properties                |  4 ++++
 pack.sh                          | 15 ++++++++++++---
 trace.sh                         | 27 +++++++++++++++++----------
 7 files changed, 82 insertions(+), 38 deletions(-)

diff --git a/1-build/Dockerfile-base-cpu b/1-build/Dockerfile-base-cpu
index 9bb5846..666c9d4 100644
--- a/1-build/Dockerfile-base-cpu
+++ b/1-build/Dockerfile-base-cpu
@@ -1,9 +1,19 @@
-FROM python:3.9
+ARG BASE_IMAGE=python:3.9
+
+FROM ${BASE_IMAGE}
+ARG BASE_IMAGE=python:3.9
+ARG FRAMEWORK=fastapi
 
-LABEL description="Base container for CPU models"
 
-RUN apt-get update && apt-get install -y htop dnsutils bc vim
+LABEL description="Base container for CPU models"
 
-RUN pip install torch configparser transformers
+USER root
 
-RUN echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc
\ No newline at end of file
+RUN if [ "$FRAMEWORK" = "fastapi" ]; then \
+        apt-get update && apt-get install -y htop dnsutils bc vim; \
+        pip install torch configparser transformers; \
+        echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc; \
+    else \
+        apt-get update && apt-get install -y wget; \
+        pip install configparser transformers; \
+    fi
diff --git a/5-test/tests/benchmark.sh b/5-test/tests/benchmark.sh
index 9ec1a0d..f21bce9 100755
--- a/5-test/tests/benchmark.sh
+++ b/5-test/tests/benchmark.sh
@@ -5,27 +5,26 @@
 # SPDX-License-Identifier: MIT-0                                     #
 ######################################################################
 
-if [ "$num_servers" == "" ]; then
+echo "Configuring number of model servers from config.properties ..."
 
-    echo "Configuring number of model servers from config.properties ..."
-
-    if [ -f ../config.properties ]; then
-        source ../config.properties
-    elif [ -f ../../config.properties ]; then
-        source ../../config.properties
-    elif [ -f ./config.properties ]; then
-        source ./config.properties
-    else
-        echo "config.properties not found!"
-    fi
+if [ -f ../config.properties ]; then
+    source ../config.properties
+elif [ -f ../../config.properties ]; then
+    source ../../config.properties
+elif [ -f ./config.properties ]; then
+    source ./config.properties
 else
-    echo "Number of model servers ($num_servers) configured from environment ..."
+    echo "config.properties not found!"
 fi
 
+    echo "Number of model servers ($num_servers) configured from environment ..."
+
+
+
 if [ "$runtime" == "docker" ]; then
-    python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX]:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns 
+    python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX]:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --framework ${framework}
 elif [ "$runtime" == "kubernetes" ]; then 
-    python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX].${namespace}.svc.cluster.local:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns 
+    python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX].${namespace}.svc.cluster.local:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --framework ${framework}
 else
     echo "Runtime $runtime not recognized"
 fi
diff --git a/5-test/tests/benchmark_client.py b/5-test/tests/benchmark_client.py
index 9223b6a..a205aaa 100644
--- a/5-test/tests/benchmark_client.py
+++ b/5-test/tests/benchmark_client.py
@@ -39,6 +39,8 @@
     parser.add_argument('--post', default=False, action='store_true')
     parser.add_argument('--verbose', default=False, action='store_true')
     parser.add_argument('--cache_dns', default=False, action='store_true')
+    parser.add_argument('--framework', help='Server framework', type=str,
+                        default=f'fastapi')
 
     args, leftovers = parser.parse_known_args()
 
@@ -51,8 +53,12 @@
     if is_multi_model_per_instance:
         n_model_per_instance = args.n_model_per_instance
 
-    data = {'seq_0': "how many chapters the book has?",
-            'seq_1': """The number 42 is, in The Hitchhiker's Guide to the Galaxy by Douglas Adams."""}
+    if args.framework == "fastapi":
+        data = {"seq_0": "how many chapters the book has?",
+                "seq_1": """The number 42 is, in The Hitchhiker's Guide to the Galaxy by Douglas Adams."""}
+    elif args.framework == "torchserve":
+        data = "Bloomberg has decided to publish a new report on global economic situation."
+        args.post = True
     live = True
     num_infer = 0
     latency_list = []
diff --git a/build.sh b/build.sh
index c79b11a..8b186ba 100755
--- a/build.sh
+++ b/build.sh
@@ -5,6 +5,9 @@
 # SPDX-License-Identifier: MIT-0                                     #
 ######################################################################
 
+BASE_IMAGE=python:3.9
+FRAMEWORK=fastapi
+
 print_help() {
 	echo ""
 	echo "Usage: $0 [arg]"
@@ -24,14 +27,20 @@ action=$1
 if [ "$action" == "" ]; then
 	source ./config.properties
 
+	if [ "$framework" == "torchserve" ]
+	then
+  	BASE_IMAGE=pytorch/torchserve:latest-cpu
+	FRAMEWORK=torchserve
+	fi
+
 	echo ""
-	echo "Building base container ..."
+	echo "Building base container ... "
 	
 	echo ""
 	dockerfile=./1-build/Dockerfile-base-${processor}
 	if [ -f $dockerfile ]; then
-		echo "    ... base-${processor} ..."
-		docker build -t ${registry}${base_image_name}${base_image_tag} -f $dockerfile .
+		echo "    ... base-${processor} ... "
+		docker build --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg FRAMEWORK="${FRAMEWORK}" -t ${registry}${base_image_name}${base_image_tag} -f $dockerfile .
 	else
 		echo "Dockerfile $dockerfile was not found."
 	        echo "Please ensure that processor is configured with a supported value in config.properties"
diff --git a/config.properties b/config.properties
index 9d1c989..7ce6042 100644
--- a/config.properties
+++ b/config.properties
@@ -8,6 +8,10 @@
 # SPDX-License-Identifier: MIT-0                                     #
 ######################################################################
 
+# Model Serving Framework settings
+# framework = fastapi|torchserve
+framework=fastapi
+
 # Model settings
 huggingface_model_name=bert-base-multilingual-cased
 huggingface_tokenizer_class=BertTokenizer
diff --git a/pack.sh b/pack.sh
index c82a103..e57ca0a 100755
--- a/pack.sh
+++ b/pack.sh
@@ -27,9 +27,18 @@ action=$1
 if [ "$action" == "" ]; then
     model_file_name=${huggingface_model_name}_bs${batch_size}_seq${sequence_length}_pc${pipeline_cores}_${processor}.pt
     
-    docker build -t ${registry}${model_image_name}${model_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \
-                 --build-arg MODEL_NAME=${huggingface_model_name} --build-arg MODEL_FILE_NAME=${model_file_name} --build-arg PROCESSOR=${processor} \
-                 -f 3-pack/Dockerfile .
+    if [ "$framework" == "torchserve" ]
+    	then
+        docker build -t ${registry}${model_image_name}${model_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \
+                     --build-arg MODEL_NAME=${huggingface_model_name} --build-arg MODEL_FILE_NAME=${model_file_name} --build-arg PROCESSOR=${processor} \
+                     -f 3-pack/Dockerfile.torchserve .
+    	fi
+    if [ "$framework" == "fastapi" ]
+    	then
+        docker build -t ${registry}${model_image_name}${model_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \
+                     --build-arg MODEL_NAME=${huggingface_model_name} --build-arg MODEL_FILE_NAME=${model_file_name} --build-arg PROCESSOR=${processor} \
+                     -f 3-pack/Dockerfile .
+        fi
 elif [ "$action" == "push" ]; then
     ./3-pack/push.sh
 elif [ "$action" == "pull" ]; then
diff --git a/trace.sh b/trace.sh
index 4decc5a..6f89260 100755
--- a/trace.sh
+++ b/trace.sh
@@ -5,6 +5,8 @@
 # SPDX-License-Identifier: MIT-0                                     #
 ######################################################################
 
+FRAMEWORK=fastapi
+
 print_help() {
 	echo ""
 	echo "Usage: $0 "
@@ -19,17 +21,22 @@ print_help() {
 if [ "$1" == "" ]; then 
 	source ./config.properties
 	echo ""
-	echo "Tracing model: $huggingface_model_name ..."
+	if [ "$framework" == "torchserve" ]
+	then
+		echo "Skipping Tracing model: $huggingface_model_name  for TorchServe..."
+	else	
+		echo "Tracing model: $huggingface_model_name ..."
 	
-	dockerfile=./1-build/Dockerfile-base-${processor}
-	echo ""
-	if [ -f $dockerfile ]; then
-		echo "   ... for processor: $processor ..."
-		trace_opts=trace_opts_${processor}
-		docker run ${!trace_opts} -it --rm -v $(pwd)/2-trace:/app/trace -v $(pwd)/config.properties:/app/config.properties ${registry}${base_image_name}${base_image_tag} bash -c "cd /app/trace; python model-tracer.py"
-	else
-		echo "Processor $processor is not supported. Please ensure the processor setting in config.properties is configured properly"
-		exit 1
+		dockerfile=./1-build/Dockerfile-base-${processor}
+		echo ""
+		if [ -f $dockerfile ]; then
+			echo "   ... for processor: $processor ..."
+			trace_opts=trace_opts_${processor}
+			docker run ${!trace_opts} -it --rm -v $(pwd)/2-trace:/app/trace -v $(pwd)/config.properties:/app/config.properties ${registry}${base_image_name}${base_image_tag} bash -c "cd /app/trace; python model-tracer.py"
+		else
+			echo "Processor $processor is not supported. Please ensure the processor setting in config.properties is configured properly"
+			exit 1
+		fi
 	fi
 else
 	print_help

From b55acc129c8268b3a3d9c941f8dd6ac7934a0341 Mon Sep 17 00:00:00 2001
From: Ankith Gunapal <agunapal@fb.com>
Date: Thu, 25 Jan 2024 14:12:16 -0800
Subject: [PATCH 2/5] Changes to support TorchServe on cpu

---
 3-pack/Dockerfile.torchserve | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 3-pack/Dockerfile.torchserve

diff --git a/3-pack/Dockerfile.torchserve b/3-pack/Dockerfile.torchserve
new file mode 100644
index 0000000..6080472
--- /dev/null
+++ b/3-pack/Dockerfile.torchserve
@@ -0,0 +1,20 @@
+ARG BASE_IMAGE
+
+FROM $BASE_IMAGE
+
+ARG MODEL_NAME
+ARG MODEL_FILE_NAME
+ARG PROCESSOR
+
+
+LABEL description="Model $MODEL_NAME packed in a TorchServe container to run on $PROCESSOR" 
+
+RUN wget https://torchserve.pytorch.org/mar_files/bert_seqc_without_torchscript.mar -O /home/model-server/model-store/BERTSC.mar
+
+WORKDIR /home/model-server
+
+ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
+
+CMD ["serve"]
+
+CMD ["torchserve", "--start", "--ts-config", "/home/model-server/config.properties", "--models", "model0=BERTSC.mar"]
\ No newline at end of file

From 22ab3452f13b3fd351b901819b292f4543bd9ca2 Mon Sep 17 00:00:00 2001
From: Ankith Gunapal <agunapal@fb.com>
Date: Thu, 25 Jan 2024 15:22:45 -0800
Subject: [PATCH 3/5] changed from framework to model_server

---
 1-build/Dockerfile-base-cpu      | 4 ++--
 5-test/tests/benchmark.sh        | 4 ++--
 5-test/tests/benchmark_client.py | 6 +++---
 build.sh                         | 8 ++++----
 config.properties                | 6 +++---
 pack.sh                          | 4 ++--
 trace.sh                         | 4 ++--
 7 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/1-build/Dockerfile-base-cpu b/1-build/Dockerfile-base-cpu
index 666c9d4..9303573 100644
--- a/1-build/Dockerfile-base-cpu
+++ b/1-build/Dockerfile-base-cpu
@@ -2,14 +2,14 @@ ARG BASE_IMAGE=python:3.9
 
 FROM ${BASE_IMAGE}
 ARG BASE_IMAGE=python:3.9
-ARG FRAMEWORK=fastapi
+ARG MODEL_SERVER=fastapi
 
 
 LABEL description="Base container for CPU models"
 
 USER root
 
-RUN if [ "$FRAMEWORK" = "fastapi" ]; then \
+RUN if [ "$MODEL_SERVER" = "fastapi" ]; then \
         apt-get update && apt-get install -y htop dnsutils bc vim; \
         pip install torch configparser transformers; \
         echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc; \
diff --git a/5-test/tests/benchmark.sh b/5-test/tests/benchmark.sh
index f21bce9..d557323 100755
--- a/5-test/tests/benchmark.sh
+++ b/5-test/tests/benchmark.sh
@@ -22,9 +22,9 @@ fi
 
 
 if [ "$runtime" == "docker" ]; then
-    python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX]:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --framework ${framework}
+    python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX]:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --model_server ${model_server}
 elif [ "$runtime" == "kubernetes" ]; then 
-    python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX].${namespace}.svc.cluster.local:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --framework ${framework}
+    python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX].${namespace}.svc.cluster.local:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns --model_server ${model_server}
 else
     echo "Runtime $runtime not recognized"
 fi
diff --git a/5-test/tests/benchmark_client.py b/5-test/tests/benchmark_client.py
index a205aaa..8bbaff9 100644
--- a/5-test/tests/benchmark_client.py
+++ b/5-test/tests/benchmark_client.py
@@ -39,7 +39,7 @@
     parser.add_argument('--post', default=False, action='store_true')
     parser.add_argument('--verbose', default=False, action='store_true')
     parser.add_argument('--cache_dns', default=False, action='store_true')
-    parser.add_argument('--framework', help='Server framework', type=str,
+    parser.add_argument('--model_server', help='Model Server', type=str,
                         default=f'fastapi')
 
     args, leftovers = parser.parse_known_args()
@@ -53,10 +53,10 @@
     if is_multi_model_per_instance:
         n_model_per_instance = args.n_model_per_instance
 
-    if args.framework == "fastapi":
+    if args.model_server == "fastapi":
         data = {"seq_0": "how many chapters the book has?",
                 "seq_1": """The number 42 is, in The Hitchhiker's Guide to the Galaxy by Douglas Adams."""}
-    elif args.framework == "torchserve":
+    elif args.model_server == "torchserve":
         data = "Bloomberg has decided to publish a new report on global economic situation."
         args.post = True
     live = True
diff --git a/build.sh b/build.sh
index 8b186ba..d5cf9d8 100755
--- a/build.sh
+++ b/build.sh
@@ -6,7 +6,7 @@
 ######################################################################
 
 BASE_IMAGE=python:3.9
-FRAMEWORK=fastapi
+MODEL_SERVER=fastapi
 
 print_help() {
 	echo ""
@@ -27,10 +27,10 @@ action=$1
 if [ "$action" == "" ]; then
 	source ./config.properties
 
-	if [ "$framework" == "torchserve" ]
+	if [ "$model_server" == "torchserve" ]
 	then
   	BASE_IMAGE=pytorch/torchserve:latest-cpu
-	FRAMEWORK=torchserve
+	MODEL_SERVER=torchserve
 	fi
 
 	echo ""
@@ -40,7 +40,7 @@ if [ "$action" == "" ]; then
 	dockerfile=./1-build/Dockerfile-base-${processor}
 	if [ -f $dockerfile ]; then
 		echo "    ... base-${processor} ... "
-		docker build --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg FRAMEWORK="${FRAMEWORK}" -t ${registry}${base_image_name}${base_image_tag} -f $dockerfile .
+		docker build --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg MODEL_SERVER="${MODEL_SERVER}" -t ${registry}${base_image_name}${base_image_tag} -f $dockerfile .
 	else
 		echo "Dockerfile $dockerfile was not found."
 	        echo "Please ensure that processor is configured with a supported value in config.properties"
diff --git a/config.properties b/config.properties
index 7ce6042..43b0c9a 100644
--- a/config.properties
+++ b/config.properties
@@ -8,9 +8,9 @@
 # SPDX-License-Identifier: MIT-0                                     #
 ######################################################################
 
-# Model Serving Framework settings
-# framework = fastapi|torchserve
-framework=fastapi
+# Model Server Config
+# model_server = fastapi|torchserve
+model_server=fastapi
 
 # Model settings
 huggingface_model_name=bert-base-multilingual-cased
diff --git a/pack.sh b/pack.sh
index e57ca0a..6197d44 100755
--- a/pack.sh
+++ b/pack.sh
@@ -27,13 +27,13 @@ action=$1
 if [ "$action" == "" ]; then
     model_file_name=${huggingface_model_name}_bs${batch_size}_seq${sequence_length}_pc${pipeline_cores}_${processor}.pt
     
-    if [ "$framework" == "torchserve" ]
+    if [ "$model_server" == "torchserve" ]
     	then
         docker build -t ${registry}${model_image_name}${model_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \
                      --build-arg MODEL_NAME=${huggingface_model_name} --build-arg MODEL_FILE_NAME=${model_file_name} --build-arg PROCESSOR=${processor} \
                      -f 3-pack/Dockerfile.torchserve .
     	fi
-    if [ "$framework" == "fastapi" ]
+    if [ "$model_server" == "fastapi" ]
     	then
         docker build -t ${registry}${model_image_name}${model_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \
                      --build-arg MODEL_NAME=${huggingface_model_name} --build-arg MODEL_FILE_NAME=${model_file_name} --build-arg PROCESSOR=${processor} \
diff --git a/trace.sh b/trace.sh
index 6f89260..e66dddd 100755
--- a/trace.sh
+++ b/trace.sh
@@ -5,7 +5,7 @@
 # SPDX-License-Identifier: MIT-0                                     #
 ######################################################################
 
-FRAMEWORK=fastapi
+MODEL_SERVER=fastapi
 
 print_help() {
 	echo ""
@@ -21,7 +21,7 @@ print_help() {
 if [ "$1" == "" ]; then 
 	source ./config.properties
 	echo ""
-	if [ "$framework" == "torchserve" ]
+	if [ "$model_server" == "torchserve" ]
 	then
 		echo "Skipping Tracing model: $huggingface_model_name  for TorchServe..."
 	else	

From db4197d6ee90cb2e33baa5c676ce784671c99e32 Mon Sep 17 00:00:00 2001
From: Ankith Gunapal <agunapal@fb.com>
Date: Mon, 8 Apr 2024 18:07:01 -0700
Subject: [PATCH 4/5] Changes to support GPU added

---
 1-build/Dockerfile-base-cpu      | 10 +++++----
 1-build/Dockerfile-base-gpu      | 35 ++++++++++++++++++--------------
 3-pack/Dockerfile.torchserve     | 13 ++++++------
 5-test/tests/benchmark_client.py |  8 +++-----
 build.sh                         |  2 +-
 5 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/1-build/Dockerfile-base-cpu b/1-build/Dockerfile-base-cpu
index 9303573..a8945e4 100644
--- a/1-build/Dockerfile-base-cpu
+++ b/1-build/Dockerfile-base-cpu
@@ -9,11 +9,13 @@ LABEL description="Base container for CPU models"
 
 USER root
 
+RUN apt-get update && apt-get install -y htop dnsutils bc vim
+
+RUN pip install configparser
+
 RUN if [ "$MODEL_SERVER" = "fastapi" ]; then \
-        apt-get update && apt-get install -y htop dnsutils bc vim; \
-        pip install torch configparser transformers; \
+        pip install torch transformers; \
         echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc; \
     else \
-        apt-get update && apt-get install -y wget; \
-        pip install configparser transformers; \
+        apt-get update && apt-get install -y curl; \
     fi
diff --git a/1-build/Dockerfile-base-gpu b/1-build/Dockerfile-base-gpu
index 2f0dcbf..1e4c96c 100644
--- a/1-build/Dockerfile-base-gpu
+++ b/1-build/Dockerfile-base-gpu
@@ -1,20 +1,25 @@
-FROM nvidia/cuda:11.1.1-runtime-ubuntu20.04
+ARG BASE_IMAGE=nvidia/cuda:11.1.1-runtime-ubuntu20.04
+FROM ${BASE_IMAGE}
+ARG BASE_IMAGE=nvidia/cuda:11.1.1-runtime-ubuntu20.04
+ARG MODEL_SERVER=fastapi
 
 LABEL description="Base container for GPU models"
 
-RUN apt-get update && apt-get install -y htop vim wget curl software-properties-common debconf-utils python3-distutils dnsutils bc
+USER root
 
-# Install python3.9
-RUN DEBIAN_FRONTEND=noninteractive; add-apt-repository -y ppa:deadsnakes/ppa; apt install -y python3.9; update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1
+RUN apt-get update && apt-get install -y htop dnsutils bc vim curl
+RUN pip install configparser
 
-# Install pip
-RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py; python get-pip.py; rm -f get-pip.py
-
-# Install pytorch with GPU support
-RUN pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
-
-RUN echo "PATH=/usr/local/cuda/bin\${PATH:+:\${PATH}}" >> /etc/environment
-RUN echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}" >> /etc/environment
-
-# Install other python libraries
-RUN pip install transformers configparser
+RUN if [ "$MODEL_SERVER" = "fastapi" ]; then \
+        apt-get update && apt-get install -y wget software-properties-common debconf-utils python3-distutils ; \
+        # Install python3.9
+        DEBIAN_FRONTEND=noninteractive; add-apt-repository -y ppa:deadsnakes/ppa; apt install -y python3.9; update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1;\
+        # Install pip
+        curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py; python get-pip.py; rm -f get-pip.py; \
+        # Install pytorch with GPU support
+        pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html; \
+        echo "PATH=/usr/local/cuda/bin\${PATH:+:\${PATH}}" >> /etc/environment; \
+        echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}" >> /etc/environment; \
+        # Install other python libraries
+        pip install transformers ; \
+    fi
diff --git a/3-pack/Dockerfile.torchserve b/3-pack/Dockerfile.torchserve
index 6080472..3779b78 100644
--- a/3-pack/Dockerfile.torchserve
+++ b/3-pack/Dockerfile.torchserve
@@ -1,5 +1,4 @@
 ARG BASE_IMAGE
-
 FROM $BASE_IMAGE
 
 ARG MODEL_NAME
@@ -9,12 +8,14 @@ ARG PROCESSOR
 
 LABEL description="Model $MODEL_NAME packed in a TorchServe container to run on $PROCESSOR" 
 
-RUN wget https://torchserve.pytorch.org/mar_files/bert_seqc_without_torchscript.mar -O /home/model-server/model-store/BERTSC.mar
-
 WORKDIR /home/model-server
 
-ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
+COPY 3-pack/torchserve torchserve 
+
+WORKDIR /home/model-server/torchserve
+USER root
+COPY 3-pack/torchserve/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
 
-CMD ["serve"]
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh \
+    && chown -R model-server /home/model-server
 
-CMD ["torchserve", "--start", "--ts-config", "/home/model-server/config.properties", "--models", "model0=BERTSC.mar"]
\ No newline at end of file
diff --git a/5-test/tests/benchmark_client.py b/5-test/tests/benchmark_client.py
index 8bbaff9..917c4ac 100644
--- a/5-test/tests/benchmark_client.py
+++ b/5-test/tests/benchmark_client.py
@@ -53,11 +53,9 @@
     if is_multi_model_per_instance:
         n_model_per_instance = args.n_model_per_instance
 
-    if args.model_server == "fastapi":
-        data = {"seq_0": "how many chapters the book has?",
-                "seq_1": """The number 42 is, in The Hitchhiker's Guide to the Galaxy by Douglas Adams."""}
-    elif args.model_server == "torchserve":
-        data = "Bloomberg has decided to publish a new report on global economic situation."
+    data = {"seq_0": "how many chapters the book has?",
+            "seq_1": """The number 42 is, in The Hitchhiker's Guide to the Galaxy by Douglas Adams."""}
+    if args.model_server == "torchserve":
         args.post = True
     live = True
     num_infer = 0
diff --git a/build.sh b/build.sh
index d5cf9d8..f9514b5 100755
--- a/build.sh
+++ b/build.sh
@@ -29,7 +29,7 @@ if [ "$action" == "" ]; then
 
 	if [ "$model_server" == "torchserve" ]
 	then
-  	BASE_IMAGE=pytorch/torchserve:latest-cpu
+  	BASE_IMAGE=pytorch/torchserve:latest-${processor}
 	MODEL_SERVER=torchserve
 	fi
 

From 2379e4f59a05b109ccf2a6c2355f6b97fd8907c0 Mon Sep 17 00:00:00 2001
From: Ankith Gunapal <agunapal@fb.com>
Date: Mon, 8 Apr 2024 18:12:51 -0700
Subject: [PATCH 5/5] Changes to build MAR file dynamically

---
 3-pack/torchserve/dockerd-entrypoint.sh |  17 +++
 3-pack/torchserve/download_model.py     | 116 ++++++++++++++++
 3-pack/torchserve/handler.py            | 167 ++++++++++++++++++++++++
 3-pack/torchserve/model-config.yaml     |   6 +
 3-pack/torchserve/requirements.txt      |   2 +
 3-pack/torchserve/setup_config.json     |   7 +
 6 files changed, 315 insertions(+)
 create mode 100644 3-pack/torchserve/dockerd-entrypoint.sh
 create mode 100644 3-pack/torchserve/download_model.py
 create mode 100644 3-pack/torchserve/handler.py
 create mode 100644 3-pack/torchserve/model-config.yaml
 create mode 100644 3-pack/torchserve/requirements.txt
 create mode 100644 3-pack/torchserve/setup_config.json

diff --git a/3-pack/torchserve/dockerd-entrypoint.sh b/3-pack/torchserve/dockerd-entrypoint.sh
new file mode 100644
index 0000000..325005c
--- /dev/null
+++ b/3-pack/torchserve/dockerd-entrypoint.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+set -e
+
+if [[ "$1" = "serve" ]]; then
+    shift 1
+
+    pip install -r requirements.txt 
+    python download_model.py
+    torch-model-archiver --model-name BERTQA --version 1.0 --handler handler.py --config-file model-config.yaml --extra-files "./setup_config.json" --archive-format no-archive --export-path /home/model-server/model-store -f 
+    mv Transformer_model /home/model-server/model-store/BERTQA/
+    torchserve --start --ts-config /home/model-server/config.properties --models model0=BERTQA
+else
+    eval "$@"
+fi
+
+# prevent docker exit
+tail -f /dev/null
diff --git a/3-pack/torchserve/download_model.py b/3-pack/torchserve/download_model.py
new file mode 100644
index 0000000..546fd5e
--- /dev/null
+++ b/3-pack/torchserve/download_model.py
@@ -0,0 +1,116 @@
+import json
+import os
+import sys
+
+import torch
+import transformers
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoModelForQuestionAnswering,
+    AutoModelForSequenceClassification,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    set_seed,
+)
+
+print("Transformers version", transformers.__version__)
+set_seed(1)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def transformers_model_dowloader(
+    mode,
+    pretrained_model_name,
+    do_lower_case,
+    max_length,
+    torchscript,
+    hardware,
+    batch_size,
+):
+    """This function, save the checkpoint, config file along with tokenizer config and vocab files
+    of a transformer model of your choice.
+    """
+    print("Download model and tokenizer", pretrained_model_name)
+    # loading pre-trained model and tokenizer
+    config = AutoConfig.from_pretrained(
+        pretrained_model_name, torchscript=torchscript
+    )
+    model = AutoModelForQuestionAnswering.from_pretrained(
+        pretrained_model_name, config=config
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        pretrained_model_name, do_lower_case=do_lower_case
+    )
+
+    NEW_DIR = "./Transformer_model"
+    try:
+        os.mkdir(NEW_DIR)
+    except OSError:
+        print("Creation of directory %s failed" % NEW_DIR)
+    else:
+        print("Successfully created directory %s " % NEW_DIR)
+
+    print(
+        "Save model and tokenizer/ Torchscript model based on the setting from setup_config",
+        pretrained_model_name,
+        "in directory",
+        NEW_DIR,
+    )
+    if save_mode == "pretrained":
+        model.save_pretrained(NEW_DIR)
+        tokenizer.save_pretrained(NEW_DIR)
+    elif save_mode == "torchscript":
+        dummy_input = "This is a dummy input for torch jit trace"
+        question = "What does the little engine say?"
+
+        context = """In the childrens story about the little engine a small locomotive is pulling a large load up a mountain.
+            Since the load is heavy and the engine is small it is not sure whether it will be able to do the job. This is a story 
+            about how an optimistic attitude empowers everyone to achieve more. In the story the little engine says: 'I think I can' as it is 
+            pulling the heavy load all the way to the top of the mountain. On the way down it says: I thought I could."""
+        inputs = tokenizer.encode_plus(
+            question,
+            context,
+            max_length=int(max_length),
+            padding='max_length',
+            add_special_tokens=True,
+            return_tensors="pt",
+            truncation=True
+        )
+        model.to(device).eval()
+        input_ids = inputs["input_ids"].to(device)
+        attention_mask = inputs["attention_mask"].to(device)
+        traced_model = torch.jit.trace(model, (input_ids, attention_mask))
+        torch.jit.save(traced_model, os.path.join(NEW_DIR, "traced_model.pt"))
+    return
+
+
+if __name__ == "__main__":
+    dirname = os.path.dirname(__file__)
+    if len(sys.argv) > 1:
+        filename = os.path.join(dirname, sys.argv[1])
+    else:
+        filename = os.path.join(dirname, "setup_config.json")
+    f = open(filename)
+    settings = json.load(f)
+    mode = settings["mode"]
+    model_name = settings["model_name"]
+    do_lower_case = settings["do_lower_case"]
+    max_length = settings["max_length"]
+    save_mode = settings["save_mode"]
+    if save_mode == "torchscript":
+        torchscript = True
+    else:
+        torchscript = False
+    hardware = settings.get("hardware")
+    batch_size = int(settings.get("batch_size", "1"))
+
+    transformers_model_dowloader(
+        mode,
+        model_name,
+        do_lower_case,
+        max_length,
+        torchscript,
+        hardware,
+        batch_size,
+    )
diff --git a/3-pack/torchserve/handler.py b/3-pack/torchserve/handler.py
new file mode 100644
index 0000000..5f92e36
--- /dev/null
+++ b/3-pack/torchserve/handler.py
@@ -0,0 +1,167 @@
+import ast
+import json
+import logging
+import os
+
+import torch
+import transformers
+from transformers import (
+    AutoModelForQuestionAnswering,
+    AutoTokenizer,
+)
+from optimum.bettertransformer import BetterTransformer
+
+from ts.torch_handler.base_handler import BaseHandler
+
+logger = logging.getLogger(__name__)
+logger.info("Transformers version %s", transformers.__version__)
+
+
+class TransformersSeqClassifierHandler(BaseHandler):
+    """
+    Transformers handler class for sequence, token classification and question answering.
+    """
+
+    def __init__(self):
+        super(TransformersSeqClassifierHandler, self).__init__()
+        self.initialized = False
+
+    def initialize(self, ctx):
+        """In this initialize function, the BERT model is loaded and
+        the Layer Integrated Gradients Algorithm for Captum Explanations
+        is initialized here.
+        Args:
+            ctx (context): It is a JSON Object containing information
+            pertaining to the model artifacts parameters.
+        """
+        self.manifest = ctx.manifest
+        properties = ctx.system_properties
+        model_dir = properties.get("model_dir")
+        model_weights_dir = ctx.model_yaml_config["handler"]["model_dir"]
+
+        self.device = torch.device(
+            "cuda:" + str(properties.get("gpu_id"))
+            if torch.cuda.is_available() and properties.get("gpu_id") is not None
+            else "cpu"
+        )
+        # read configs for the mode, model_name, etc. from setup_config.json
+        setup_config_path = os.path.join(model_dir, "setup_config.json")
+        if os.path.isfile(setup_config_path):
+            with open(setup_config_path) as setup_config_file:
+                self.setup_config = json.load(setup_config_file)
+        else:
+            logger.warning("Missing the setup_config.json file.")
+
+        # Loading the model and tokenizer from checkpoint and config files based on the user's choice of mode
+        # further setup config can be added.
+        if self.setup_config["save_mode"] == "torchscript":
+            serialized_file = "traced_model.pt"
+            model_pt_path = os.path.join(model_weights_dir, serialized_file)
+            self.model = torch.jit.load(model_pt_path, map_location=self.device)
+        elif self.setup_config["save_mode"] == "pretrained":
+            self.model = AutoModelForQuestionAnswering.from_pretrained(model_weights_dir)
+
+            try:
+                self.model = BetterTransformer.transform(self.model)
+            except RuntimeError as error:
+                logger.warning(
+                    "HuggingFace Optimum is not supporting this model,for the list of supported models, please refer to this doc,https://huggingface.co/docs/optimum/bettertransformer/overview"
+                )
+        self.model.to(self.device)
+
+        if self.setup_config["save_mode"] == "pretrained":
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                    self.setup_config["model_name"],
+                    do_lower_case=self.setup_config["do_lower_case"],
+            )
+        else:
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_dir,
+                do_lower_case=self.setup_config["do_lower_case"],
+            )
+
+        self.model.eval()
+        logger.info("Transformer model from path %s loaded successfully", model_dir)
+
+        self.initialized = True
+
+    def preprocess(self, requests):
+        """Basic text preprocessing, based on the user's chocie of application mode.
+        Args:
+            requests (str): The Input data in the form of text is passed on to the preprocess
+            function.
+        Returns:
+            list : The preprocess function returns a list of Tensor for the size of the word tokens.
+        """
+        input_ids_batch = None
+        attention_mask_batch = None
+        logger.info(f"req: {requests}")
+        for idx, input_text in enumerate(requests):
+            max_length = self.setup_config["max_length"]
+            logger.info("Received text: '%s'", input_text)
+
+            question = input_text["seq_0"].decode("utf-8")
+            context = input_text["seq_1"].decode("utf-8")
+            logger.info(f" question: {question}")
+            logger.info(f"context: {context}")
+            inputs = self.tokenizer.encode_plus(
+                question,
+                context,
+                max_length=int(max_length),
+                padding='max_length',
+                add_special_tokens=True,
+                return_tensors="pt",
+                truncation=True
+            )
+            input_ids = inputs["input_ids"].to(self.device)
+            attention_mask = inputs["attention_mask"].to(self.device)
+            # making a batch out of the recieved requests
+            # attention masks are passed for cases where input tokens are padded.
+            if input_ids.shape is not None:
+                if input_ids_batch is None:
+                    input_ids_batch = input_ids
+                    attention_mask_batch = attention_mask
+                else:
+                    input_ids_batch = torch.cat((input_ids_batch, input_ids), 0)
+                    attention_mask_batch = torch.cat(
+                        (attention_mask_batch, attention_mask), 0
+                    )
+        return (input_ids_batch, attention_mask_batch)
+
+    def inference(self, input_batch):
+        """Predict the class (or classes) of the received text using the
+        serialized transformers checkpoint.
+        Args:
+            input_batch (list): List of Text Tensors from the pre-process function is passed here
+        Returns:
+            list : It returns a list of the predicted value for the input text
+        """
+        input_ids_batch, attention_mask_batch = input_batch
+        inferences = []
+        # the output should be only answer_start and answer_end
+        # we are outputing the words just for demonstration.
+        output = self.model(
+            input_ids_batch, attention_mask_batch
+        )
+        answer_text = str(output[0])
+        answer_start = torch.argmax(output[0])
+        answer_end = torch.argmax(output[1])+1
+        if (answer_end > answer_start):
+            answer_text = self.tokenizer.convert_tokens_to_string(self.tokenizer.convert_ids_to_tokens(input_ids_batch[0][answer_start:answer_end]))
+        else:
+            answer_text = self.tokenizer.convert_tokens_to_string(self.tokenizer.convert_ids_to_tokens(input_ids_batch[0][answer_start:]))
+        inferences.append(answer_text)
+        logger.info("Model predicted: '%s'", answer_text)
+
+
+        print("Generated text", inferences)
+        return inferences
+
+    def postprocess(self, inference_output):
+        """Post Process Function converts the predicted response into Torchserve readable format.
+        Args:
+            inference_output (list): It contains the predicted response of the input text.
+        Returns:
+            (list): Returns a list of the Predictions and Explanations.
+        """
+        return inference_output
diff --git a/3-pack/torchserve/model-config.yaml b/3-pack/torchserve/model-config.yaml
new file mode 100644
index 0000000..fb86520
--- /dev/null
+++ b/3-pack/torchserve/model-config.yaml
@@ -0,0 +1,6 @@
+minWorkers: 1
+maxWorkers: 1
+batchSize: 1
+responseTimeout: 240
+handler:
+  model_dir: "Transformer_model"
diff --git a/3-pack/torchserve/requirements.txt b/3-pack/torchserve/requirements.txt
new file mode 100644
index 0000000..196e970
--- /dev/null
+++ b/3-pack/torchserve/requirements.txt
@@ -0,0 +1,2 @@
+transformers
+optimum
diff --git a/3-pack/torchserve/setup_config.json b/3-pack/torchserve/setup_config.json
new file mode 100644
index 0000000..7c4597e
--- /dev/null
+++ b/3-pack/torchserve/setup_config.json
@@ -0,0 +1,7 @@
+{
+ "model_name":"bert-base-multilingual-cased",
+ "mode":"question_answering",
+ "do_lower_case":true,
+ "save_mode":"pretrained",
+ "max_length":"128"
+}