Dockerfile.sdk

# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#
# Multistage build.
#

# Base image on the minimum Triton container
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.08-py3-min

ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
ARG TRITON_COMMON_REPO_TAG=main
ARG TRITON_CORE_REPO_TAG=main
ARG TRITON_CLIENT_REPO_TAG=main
ARG TRITON_THIRD_PARTY_REPO_TAG=main
ARG TRITON_MODEL_ANALYZER_REPO_TAG=main
ARG TRITON_ENABLE_GPU=ON
ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8

# DCGM version to install for Model Analyzer
ARG DCGM_VERSION=3.2.6

ARG NVIDIA_TRITON_SERVER_SDK_VERSION=unknown
ARG NVIDIA_BUILD_ID=unknown

############################################################################
##  Build image
############################################################################

FROM ${BASE_IMAGE} AS sdk_build

# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
            ca-certificates \
            software-properties-common \
            autoconf \
            automake \
            build-essential \
            curl \
            git \
            gperf \
            libb64-dev \
            libgoogle-perftools-dev \
            libopencv-dev \
            libopencv-core-dev \
            libssl-dev \
            libtool \
            pkg-config \
            python3 \
            python3-pip \
            python3-dev \
            rapidjson-dev \
            vim \
            wget \
            python3-pdfkit \
            openjdk-11-jdk \
            maven && \
    pip3 install --upgrade wheel setuptools && \
    pip3 install --upgrade grpcio-tools && \
    pip3 install --upgrade pip

# Client build requires recent version of CMake (FetchContent required)
# Using CMAKE installation instruction from:: https://apt.kitware.com/
RUN apt update -q=2 \
    && apt install -y gpg wget \
    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
    && . /etc/os-release \
    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
    && apt-get update -q=2 \
    && apt-get install -y --no-install-recommends cmake=3.27.7* cmake-data=3.27.7* \
    && cmake --version

# Build expects "python" executable (not python3).
RUN rm -f /usr/bin/python && \
    ln -s /usr/bin/python3 /usr/bin/python

# Build the client library and examples
ARG TRITON_REPO_ORGANIZATION
ARG TRITON_CLIENT_REPO_SUBDIR
ARG TRITON_PA_REPO_SUBDIR
ARG TRITON_COMMON_REPO_TAG
ARG TRITON_CORE_REPO_TAG
ARG TRITON_CLIENT_REPO_TAG
ARG TRITON_THIRD_PARTY_REPO_TAG
ARG TRITON_ENABLE_GPU
ARG JAVA_BINDINGS_MAVEN_VERSION
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG
ARG TARGETPLATFORM

WORKDIR /workspace
COPY TRITON_VERSION .
COPY ${TRITON_CLIENT_REPO_SUBDIR} client
COPY ${TRITON_PA_REPO_SUBDIR} perf_analyzer

WORKDIR /workspace/client_build
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
          -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
          -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
          -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
          -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
          -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
          -DTRITON_ENABLE_PERF_ANALYZER=OFF \
          -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
          -DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \
          -DTRITON_ENABLE_JAVA_HTTP=ON \
          -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
          -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
RUN make -j16 cc-clients java-clients && \
    rm -fr ~/.m2

# TODO: PA will rebuild the CC clients since it depends on it.
# This should be optimized so that we do not have to build
# the CC clients twice. Similarly, because the SDK expectation is
# that PA is packaged with the python client, we hold off on building
# the python client until now. Post-migration we should focus
# effort on de-tangling these flows.
WORKDIR /workspace/pa_build
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
          -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
          -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
          -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
          -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
          -DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
          -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
          -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \
          -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \
          -DTRITON_ENABLE_PERF_ANALYZER_OPENAI=ON \
          -DTRITON_ENABLE_CC_HTTP=ON \
          -DTRITON_ENABLE_CC_GRPC=ON \
          -DTRITON_ENABLE_PYTHON_HTTP=ON \
          -DTRITON_ENABLE_PYTHON_GRPC=ON \
          -DTRITON_PACKAGE_PERF_ANALYZER=ON \
          -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
          /workspace/perf_analyzer
RUN make -j16 perf-analyzer python-clients

RUN pip3 install build \
    && cd /workspace/perf_analyzer/genai-perf \
    && python3 -m build --wheel --outdir /workspace/install/python

# Install Java API Bindings
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
        source /workspace/client/src/java-api-bindings/scripts/install_dependencies_and_build.sh \
        --maven-version ${JAVA_BINDINGS_MAVEN_VERSION} \
        --core-tag ${TRITON_CORE_REPO_TAG} \
        --javacpp-tag ${JAVA_BINDINGS_JAVACPP_PRESETS_TAG} \
        --jar-install-path /workspace/install/java-api-bindings; \
    fi

############################################################################
## Create sdk container
############################################################################
FROM ${BASE_IMAGE}

# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive

ARG DCGM_VERSION
ARG TRITON_REPO_ORGANIZATION
ARG TRITON_CORE_REPO_TAG
ARG TARGETPLATFORM
ARG TRITON_ENABLE_GPU

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
            software-properties-common \
            curl \
            git \
            gperf \
            libb64-dev \
            libgoogle-perftools-dev \
            libopencv-dev \
            libopencv-core-dev \
            libssl-dev \
            libtool \
            python3 \
            python3-pip \
            python3-dev \
            vim \
            wget \
            python3-pdfkit \
            maven \
            default-jdk && \
    pip3 install --upgrade wheel setuptools && \
    pip3 install --upgrade grpcio-tools && \
    pip3 install --upgrade pip

WORKDIR /workspace
COPY TRITON_VERSION .
COPY NVIDIA_Deep_Learning_Container_License.pdf .
COPY --from=sdk_build /workspace/client/ client/
COPY --from=sdk_build /workspace/perf_analyzer/ perf_analyzer/
COPY --from=sdk_build /workspace/install/ install/
RUN cd install && \
    export VERSION=`cat /workspace/TRITON_VERSION` && \
    tar zcf /workspace/v$VERSION.clients.tar.gz *

# For CI testing need to copy over L0_sdk test and L0_client_build_variants test.
RUN mkdir qa
COPY qa/L0_sdk qa/L0_sdk
COPY qa/L0_client_build_variants qa/L0_client_build_variants

# Create a directory for all the python client tests to enable unit testing
RUN mkdir -p qa/python_client_unit_tests/
COPY --from=sdk_build /workspace/client/src/python/library/tests/* qa/python_client_unit_tests/

# Install an image needed by the quickstart and other documentation.
COPY qa/images/mug.jpg images/mug.jpg

# Install the dependencies needed to run the client examples. These
# are not needed for building but including them allows this image to
# be used to run the client examples.
RUN pip3 install --upgrade "numpy<2" pillow attrdict && \
    find install/python/ -maxdepth 1 -type f -name \
         "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
    xargs pip3 install --upgrade

RUN pip3 install install/python/genai_perf-*.whl

# Install DCGM
RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
        [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \
        curl -o /tmp/cuda-keyring.deb \
        https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/$arch/cuda-keyring_1.0-1_all.deb \
        && apt install /tmp/cuda-keyring.deb && rm /tmp/cuda-keyring.deb && \
        apt-get update && apt-get install -y datacenter-gpu-manager=1:${DCGM_VERSION}; \
    fi

# Build expects "python" executable (not python3).
RUN rm -f /usr/bin/python && \
    ln -s /usr/bin/python3 /usr/bin/python

# Install Model Analyzer
ARG TRITON_MODEL_ANALYZER_REPO_TAG
ARG TRITON_MODEL_ANALYZER_REPO="${TRITON_REPO_ORGANIZATION}/model_analyzer@${TRITON_MODEL_ANALYZER_REPO_TAG}"
RUN pip3 install "git+${TRITON_MODEL_ANALYZER_REPO}"

# Entrypoint Banner
ENV NVIDIA_PRODUCT_NAME="Triton Server SDK"
COPY docker/entrypoint.d/ /opt/nvidia/entrypoint.d/
RUN sed 's/Server/Server SDK/' /opt/nvidia/entrypoint.d/10-banner.txt | \
    sed 's/^===/=======/' > /opt/nvidia/entrypoint.d/10-banner.new && \
    mv /opt/nvidia/entrypoint.d/10-banner.new /opt/nvidia/entrypoint.d/10-banner.txt

ARG NVIDIA_TRITON_SERVER_SDK_VERSION
ARG NVIDIA_BUILD_ID
ENV NVIDIA_TRITON_SERVER_SDK_VERSION=${NVIDIA_TRITON_SERVER_SDK_VERSION}
ENV NVIDIA_BUILD_ID=${NVIDIA_BUILD_ID}

ENV PATH /workspace/install/bin:${PATH}
ENV LD_LIBRARY_PATH /workspace/install/lib:${LD_LIBRARY_PATH}

# DLIS-3631: Needed to run Perf Analyzer CI tests correctly
ENV LD_LIBRARY_PATH /opt/hpcx/ompi/lib:${LD_LIBRARY_PATH}

# Set TCMALLOC_RELEASE_RATE for users setting LD_PRELOAD with tcmalloc
ENV TCMALLOC_RELEASE_RATE 200