Skip to content

Commit

Permalink
dars release 0.1.2
Browse files Browse the repository at this point in the history
Contains the following changes:

* MKL library updated to from 2019.2.187 to 2019.4.243
* Implement CLR multi-stage, reducing image size by 10%.
* Stack update from CLR version 29480 to 30690
* Basic Environment variables for Hadoop and Spark incorporated.
* Basic Configuration files for Hadoop and Spark pointing to stateless folder.
  • Loading branch information
Leonardo Sandoval authored and gtkramer committed Aug 28, 2019
1 parent 8de3c3c commit 52b2efd
Show file tree
Hide file tree
Showing 21 changed files with 192 additions and 54 deletions.
96 changes: 68 additions & 28 deletions stacks/dars/mkl/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,38 +1,78 @@
FROM clearlinux
LABEL maintainer="[email protected]"

FROM clearlinux:latest AS builder
ARG swupd_args

WORKDIR /root
# Move to latest Clear Linux release to ensure
# that the swupd command line arguments are
# correct
RUN swupd update --no-boot-update $swupd_args

# Grab os-release info from the minimal base image so
# that the new content matches the exact OS version
COPY --from=clearlinux/os-core:latest /usr/lib/os-release /

# Install additional content in a target directory
# using the os version from the minimal base
RUN source /os-release && \
mkdir /install_root \
&& swupd os-install -V ${VERSION_ID} \
--path /install_root --statedir /swupd-state \
--bundles=big-data-basic,cpio,os-core-update,which --no-boot-update \
&& rm -rf /install_root/var/lib/swupd/*

# For some Host OS configuration with redirect_dir on,
# extra data are saved on the upper layer when the same
# file exists on different layers. To minimize docker
# image size, remove the overlapped files before copy.
RUN mkdir /os_core_install
COPY --from=clearlinux/os-core:latest / /os_core_install/
RUN find / os_core_install | sed -e 's/os_core_install/install_root/' | xargs rm -d &> /dev/null || true

FROM clearlinux/os-core:latest
LABEL [email protected]

ENV HOME=/root

# ldconfig configuration
COPY dars.ld.so.conf .
RUN cat dars.ld.so.conf >> /etc/ld.so.conf
# Configure openjdk11
ENV JAVA_HOME=/usr/lib/jvm/java-1.11.0-openjdk
ENV PATH="${JAVA_HOME}/bin:${PATH}"

# OS update and bundle installation
RUN swupd update $swupd_args && \
swupd bundle-add --skip-diskspace-check \
big-data-basic \
which
# Environment variables to point to Hadoop,
# Spark and YARN installation and configuration
ENV HADOOP_HOME=/usr
ENV HADOOP_CONF_DIR=/etc/hadoop
ENV HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
ENV HADOOP_DEFAULT_LIBEXEC_DIR=$HADOOP_HOME/libexec
ENV HADOOP_IDENT_STRING=root
ENV HADOOP_LOG_DIR=/var/log/hadoop
ENV HADOOP_PID_DIR=/var/log/hadoop/pid
ENV HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"

COPY profile /etc/profile
ENV HDFS_DATANODE_USER=root
ENV HDFS_NAMENODE_USER=root
ENV HDFS_SECONDARYNAMENODE_USER=root

ENV SPARK_HOME=/usr/share/apache-spark
ENV SPARK_CONF_DIR=/etc/spark

# start: MKL specific
ENV MKL_INSTALLER=http://registrationcenter-download.intel.com/akdlm/irc_nas/tec/15095/l_mkl_2019.2.187_online.tgz
ENV MKL_WRAPPER=https://github.com/Intel-bigdata/mkl_wrapper_for_non_CDH/raw/master
ENV MKL_TARGET_DIR=/opt/intel/mkl/wrapper
ENV YARN_RESOURCEMANAGER_USER=root
ENV YARN_NODEMANAGER_USER=root

COPY silent.cfg .
RUN swupd bundle-add curl cpio && \
curl ${MKL_INSTALLER} -o l_mkl.tgz && \
mkdir l_mkl && \
tar -xvf l_mkl.tgz -C l_mkl --strip-components=1 && \
l_mkl/install.sh -s silent.cfg && \
rm -rf l_mkl && \
mkdir -p ${MKL_TARGET_DIR} && \
curl -L ${MKL_WRAPPER}/mkl_wrapper.jar -o ${MKL_TARGET_DIR}/mkl_wrapper.jar && \
curl -L ${MKL_WRAPPER}/mkl_wrapper.so -o ${MKL_TARGET_DIR}/mkl_wrapper.so && \
COPY --from=builder /install_root /

COPY dars.ld.so.conf /etc/ld.so.conf

COPY silent.cfg binaries/l_mkl_2019.4.243_online.tgz /

RUN mkdir mkl && tar -xvf l_mkl_2019.4.243_online.tgz -C mkl --strip-components=1 && \
mkl/install.sh -s silent.cfg && \
rm -rf silent.cfg l_mkl_2019.4.243_online.tgz mkl && \
ldconfig

CMD ["/bin/sh"]
COPY binaries/mkl_wrapper.so binaries/mkl_wrapper.jar /opt/intel/mkl/wrapper/

RUN mkdir -p /etc/spark /etc/hadoop

COPY spark_conf/* /etc/spark/
COPY hadoop_conf/* /etc/hadoop/

CMD ["/bin/bash"]
2 changes: 1 addition & 1 deletion stacks/dars/mkl/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ docker build --no-cache -t clearlinux/stacks-dars-mkl .

* `swupd_args` specifies [swupd update](https://github.com/clearlinux/swupd-client/blob/master/docs/swupd.1.rst#options) flags passed to the update during build.

>NOTE: An empty `swupd_args` will default to Clear Linux OS latest version. Consider this when building from the Dockerfile, as an OS update will be performed. The docker image in this registry was built and validated using version 29480.
>NOTE: An empty `swupd_args` will default to Clear Linux OS latest version. Consider this when building from the Dockerfile, as an OS update will be performed. The docker image in this registry was built and validated using version `30690`.
Binary file not shown.
Binary file added stacks/dars/mkl/binaries/mkl_wrapper.jar
Binary file not shown.
Binary file added stacks/dars/mkl/binaries/mkl_wrapper.so
Binary file not shown.
6 changes: 6 additions & 0 deletions stacks/dars/mkl/hadoop_conf/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
6 changes: 6 additions & 0 deletions stacks/dars/mkl/hadoop_conf/hdfs-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
6 changes: 6 additions & 0 deletions stacks/dars/mkl/hadoop_conf/mapred-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
1 change: 1 addition & 0 deletions stacks/dars/mkl/hadoop_conf/workers
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
localhost
6 changes: 6 additions & 0 deletions stacks/dars/mkl/hadoop_conf/yarn-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
3 changes: 3 additions & 0 deletions stacks/dars/mkl/spark_conf/spark-defaults.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# MKL flags
spark.executor.extraJavaOptions=-Dcom.github.fommil.netlib.BLAS=com.intel.mkl.MKLBLAS -Dcom.github.fommil.netlib.LAPACK=com.intel.mkl.MKLLAPACK
spark.driver.extraJavaOptions=-Dcom.github.fommil.netlib.BLAS=com.intel.mkl.MKLBLAS -Dcom.github.fommil.netlib.LAPACK=com.intel.mkl.MKLLAPACK
2 changes: 2 additions & 0 deletions stacks/dars/mkl/spark_conf/spark-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
MKL_NUM_THREADS=1
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/native
86 changes: 62 additions & 24 deletions stacks/dars/openblas/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,33 +1,71 @@
FROM clearlinux
LABEL maintainer="[email protected]"

FROM clearlinux:latest AS builder
ARG swupd_args

WORKDIR /root
# Move to latest Clear Linux release to ensure
# that the swupd command line arguments are
# correct
RUN swupd update --no-boot-update $swupd_args

# Grab os-release info from the minimal base image so
# that the new content matches the exact OS version
COPY --from=clearlinux/os-core:latest /usr/lib/os-release /

# Install additional content in a target directory
# using the os version from the minimal base
RUN source /os-release && \
mkdir /install_root \
&& swupd os-install -V ${VERSION_ID} \
--path /install_root --statedir /swupd-state \
--bundles=big-data-basic,cpio,os-core-update,python-basic-dev,which --no-boot-update \
&& rm -rf /install_root/var/lib/swupd/*

# For some Host OS configuration with redirect_dir on,
# extra data are saved on the upper layer when the same
# file exists on different layers. To minimize docker
# image size, remove the overlapped files before copy.
RUN mkdir /os_core_install
COPY --from=clearlinux/os-core:latest / /os_core_install/
RUN find / os_core_install | sed -e 's/os_core_install/install_root/' | xargs rm -d &> /dev/null || true

FROM clearlinux/os-core:latest
LABEL [email protected]

ENV HOME=/root

# Configure openjdk11
ENV JAVA_HOME=/usr/lib/jvm/java-1.11.0-openjdk
ENV PATH="${JAVA_HOME}/bin:${PATH}"

# Environment variables to point to Hadoop,
# Spark and YARN installation and configuration
ENV HADOOP_HOME=/usr
ENV HADOOP_CONF_DIR=/etc/hadoop
ENV HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
ENV HADOOP_DEFAULT_LIBEXEC_DIR=$HADOOP_HOME/libexec
ENV HADOOP_IDENT_STRING=root
ENV HADOOP_LOG_DIR=/var/log/hadoop
ENV HADOOP_PID_DIR=/var/log/hadoop/pid
ENV HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"

ENV HDFS_DATANODE_USER=root
ENV HDFS_NAMENODE_USER=root
ENV HDFS_SECONDARYNAMENODE_USER=root

ENV SPARK_HOME=/usr/share/apache-spark
ENV SPARK_CONF_DIR=/etc/spark

# ldconfig configuration
COPY dars.ld.so.conf .
RUN cat dars.ld.so.conf >> /etc/ld.so.conf
ENV YARN_RESOURCEMANAGER_USER=root
ENV YARN_NODEMANAGER_USER=root

# OS update and bundle installation
RUN swupd update $swupd_args && \
swupd bundle-add --skip-diskspace-check \
big-data-basic \
which
COPY --from=builder /install_root /

COPY profile /etc/profile
COPY dars.ld.so.conf /etc/ld.so.conf

RUN ldconfig

# start: OpenBLAS specific
ENV OPENBLAS_AVX512=/usr/lib64/haswell/avx512_1/libopenblas_skylakexp-r0.3.5.so
RUN mkdir -p /etc/spark /etc/hadoop

# TODO: remove this softlinks once CLR team fix it
RUN swupd bundle-add --skip-diskspace-check \
python-basic-dev && \
ln -sf ${OPENBLAS_AVX512} /usr/lib64/haswell/libblas.so && \
ln -sf ${OPENBLAS_AVX512} /usr/lib64/haswell/libblas.so.3 && \
ln -sf ${OPENBLAS_AVX512} /usr/lib64/haswell/liblapack.so && \
ln -sf ${OPENBLAS_AVX512} /usr/lib64/haswell/liblapack.so.3 && \
ldconfig
COPY spark_conf/* /etc/spark/
COPY hadoop_conf/* /etc/hadoop/

CMD ["/bin/sh"]
CMD ["/bin/bash"]
2 changes: 1 addition & 1 deletion stacks/dars/openblas/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ docker build --no-cache -t clearlinux/stacks-dars-openblas .

* `swupd_args` specifies [swupd update](https://github.com/clearlinux/swupd-client/blob/master/docs/swupd.1.rst#options) flags passed to the update during build.

>NOTE: An empty `swupd_args` will default to Clear Linux OS latest version. Consider this when building from the Dockerfile, as an OS update will be performed. The docker image in this registry was built and validated using version 29480.
>NOTE: An empty `swupd_args` will default to Clear Linux OS latest version. Consider this when building from the Dockerfile, as an OS update will be performed. The docker image in this registry was built and validated using version `30690`.
6 changes: 6 additions & 0 deletions stacks/dars/openblas/hadoop_conf/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
6 changes: 6 additions & 0 deletions stacks/dars/openblas/hadoop_conf/hdfs-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
6 changes: 6 additions & 0 deletions stacks/dars/openblas/hadoop_conf/mapred-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
1 change: 1 addition & 0 deletions stacks/dars/openblas/hadoop_conf/workers
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
localhost
6 changes: 6 additions & 0 deletions stacks/dars/openblas/hadoop_conf/yarn-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
3 changes: 3 additions & 0 deletions stacks/dars/openblas/spark_conf/spark-defaults.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# OpenBlas confs
spark.executor.extraJavaOptions=-Dcom.github.fommil.netlib.BLAS=com.github.fommil.netlib.NativeSystemBLAS -Dcom.github.fommil.netlib.LAPACK=com.github.fommil.netlib.NativeSystemLAPACK -Dcom.github.fommil.netlib.ARPACK=com.github.fommil.netlib.NativeSystemARPACK
spark.driver.extraJavaOptions=-Dcom.github.fommil.netlib.BLAS=com.github.fommil.netlib.NativeSystemBLAS -Dcom.github.fommil.netlib.LAPACK=com.github.fommil.netlib.NativeSystemLAPACK -Dcom.github.fommil.netlib.ARPACK=com.github.fommil.netlib.NativeSystemARPACK
2 changes: 2 additions & 0 deletions stacks/dars/openblas/spark_conf/spark-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
OPENBLAS_NUM_THREADS=1
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/native

0 comments on commit 52b2efd

Please sign in to comment.