forked from NVIDIA/spark-rapids
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile.cuda
88 lines (75 loc) · 3.53 KB
/
Dockerfile.cuda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM nvidia/cuda:10.1-devel-ubuntu18.04
ARG spark_uid=185
# Install java dependencies
RUN apt-get update && apt-get install -y --no-install-recommends openjdk-8-jdk openjdk-8-jre
ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64
ENV PATH $PATH:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/bin:/usr/lib/jvm/java-1.8.0-openjdk-amd64/bin
# Before building the docker image, first either download Apache Spark 3.0+ from
# http://spark.apache.org/downloads.html or build and make a Spark distribution following
# the instructions in http://spark.apache.org/docs/3.0.1/building-spark.html (3.0.0 can
# be used as well).
# If this docker file is being used in the context of building your images from a Spark
# distribution, the docker build command should be invoked from the top level directory
# of the Spark distribution. E.g.:
# docker build -t spark:3.0.1 -f kubernetes/dockerfiles/spark/Dockerfile .
RUN set -ex && \
ln -s /lib /lib64 && \
mkdir -p /opt/spark && \
mkdir -p /opt/spark/jars && \
mkdir -p /opt/tpch && \
mkdir -p /opt/spark/examples && \
mkdir -p /opt/spark/work-dir && \
mkdir -p /opt/sparkRapidsPlugin && \
touch /opt/spark/RELEASE && \
rm /bin/sh && \
ln -sv /bin/bash /bin/sh && \
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
chgrp root /etc/passwd && chmod ug+rw /etc/passwd
COPY spark-3.0.1-bin-hadoop3.2/jars /opt/spark/jars
COPY spark-3.0.1-bin-hadoop3.2/bin /opt/spark/bin
COPY spark-3.0.1-bin-hadoop3.2/sbin /opt/spark/sbin
COPY spark-3.0.1-bin-hadoop3.2/kubernetes/dockerfiles/spark/entrypoint.sh /opt/
COPY spark-3.0.1-bin-hadoop3.2/examples /opt/spark/examples
COPY spark-3.0.1-bin-hadoop3.2/kubernetes/tests /opt/spark/tests
COPY spark-3.0.1-bin-hadoop3.2/data /opt/spark/data
COPY cudf-0.15-cuda10-1.jar /opt/sparkRapidsPlugin
COPY rapids-4-spark_2.12-0.2.0.jar /opt/sparkRapidsPlugin
COPY getGpusResources.sh /opt/sparkRapidsPlugin
RUN mkdir /opt/spark/python
# TODO: Investigate running both pip and pip3 via virtualenvs
RUN apt-get update && \
apt install -y python python-pip && \
apt install -y python3 python3-pip && \
# We remove ensurepip since it adds no functionality since pip is
# installed on the image and it just takes up 1.6MB on the image
rm -r /usr/lib/python*/ensurepip && \
pip install --upgrade pip setuptools && \
# You may install with python3 packages by using pip3.6
# Removed the .cache to save space
rm -r /root/.cache && rm -rf /var/cache/apt/*
COPY spark-3.0.1-bin-hadoop3.2/python/pyspark /opt/spark/python/pyspark
COPY spark-3.0.1-bin-hadoop3.2/python/lib /opt/spark/python/lib
ENV SPARK_HOME /opt/spark
WORKDIR /opt/spark/work-dir
RUN chmod g+w /opt/spark/work-dir
ENV TINI_VERSION v0.18.0
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/bin/tini
RUN chmod +rx /usr/bin/tini
ENTRYPOINT [ "/opt/entrypoint.sh" ]
# Specify the User that the actual main process will run as
USER ${spark_uid}