From 0d3f5cbe1e1bf77179234ddd7ec01ed10b00b391 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Sat, 8 Dec 2018 01:36:09 +0000 Subject: [PATCH] Update build docker images (#458) --- .circleci/config.yml | 14 +++++++------- dev/docker-images/Makefile | 6 +++--- dev/docker-images/base/Dockerfile | 5 +++-- dev/docker-images/python/Dockerfile | 7 +++---- dev/docker-images/r/Dockerfile | 8 ++------ python/pyspark/tests.py | 6 +++--- .../spark/deploy/yarn/YarnClusterSuite.scala | 8 ++++---- 7 files changed, 25 insertions(+), 29 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2374e59e6a886..43f2d58acdf31 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,7 +2,7 @@ version: 2 defaults: &defaults docker: - - image: palantirtechnologies/circle-spark-base + - image: palantirtechnologies/circle-spark-base:0.1.0 resource_class: xlarge environment: &defaults-environment TERM: dumb @@ -128,7 +128,7 @@ jobs: <<: *defaults # Some part of the maven setup fails if there's no R, so we need to use the R image here docker: - - image: palantirtechnologies/circle-spark-r + - image: palantirtechnologies/circle-spark-r:0.1.0 steps: # Saves us from recompiling every time... - restore_cache: @@ -300,7 +300,7 @@ jobs: # depends on build-sbt, but we only need the assembly jars <<: *defaults docker: - - image: palantirtechnologies/circle-spark-python + - image: palantirtechnologies/circle-spark-python:0.1.0 parallelism: 2 steps: - *checkout-code @@ -325,7 +325,7 @@ jobs: # depends on build-sbt, but we only need the assembly jars <<: *defaults docker: - - image: palantirtechnologies/circle-spark-r + - image: palantirtechnologies/circle-spark-r:0.1.0 steps: - *checkout-code - attach_workspace: @@ -438,7 +438,7 @@ jobs: <<: *defaults # Some part of the maven setup fails if there's no R, so we need to use the R image here docker: - - image: palantirtechnologies/circle-spark-r + - image: palantirtechnologies/circle-spark-r:0.1.0 steps: - *checkout-code - restore_cache: @@ -458,7 +458,7 @@ jobs: deploy-gradle: <<: *defaults docker: - - image: palantirtechnologies/circle-spark-r + - image: palantirtechnologies/circle-spark-r:0.1.0 steps: - *checkout-code - *restore-gradle-wrapper-cache @@ -470,7 +470,7 @@ jobs: <<: *defaults # Some part of the maven setup fails if there's no R, so we need to use the R image here docker: - - image: palantirtechnologies/circle-spark-r + - image: palantirtechnologies/circle-spark-r:0.1.0 steps: # This cache contains the whole project after version was set and mvn package was called # Restoring first (and instead of checkout) as mvn versions:set mutates real source code... diff --git a/dev/docker-images/Makefile b/dev/docker-images/Makefile index eb1692f810620..ed3e3a5ee7687 100644 --- a/dev/docker-images/Makefile +++ b/dev/docker-images/Makefile @@ -17,9 +17,9 @@ .PHONY: all publish base python r -BASE_IMAGE_NAME = palantirtechnologies/circle-spark-base -PYTHON_IMAGE_NAME = palantirtechnologies/circle-spark-python -R_IMAGE_NAME = palantirtechnologies/circle-spark-r +BASE_IMAGE_NAME = palantirtechnologies/circle-spark-base:0.1.0 +PYTHON_IMAGE_NAME = palantirtechnologies/circle-spark-python:0.1.0 +R_IMAGE_NAME = palantirtechnologies/circle-spark-r:0.1.0 all: base python r diff --git a/dev/docker-images/base/Dockerfile b/dev/docker-images/base/Dockerfile index e4515ca510b23..0e84ec665fcd7 100644 --- a/dev/docker-images/base/Dockerfile +++ b/dev/docker-images/base/Dockerfile @@ -15,7 +15,7 @@ # limitations under the License. # -FROM buildpack-deps:xenial +FROM buildpack-deps:cosmic # make Apt non-interactive RUN echo 'APT::Get::Assume-Yes "true";' > /etc/apt/apt.conf.d/90circleci \ @@ -107,10 +107,11 @@ WORKDIR $CIRCLE_HOME # Install miniconda, we are using it to test conda support and a bunch of tests expect CONDA_BIN to be set ENV CONDA_ROOT=$CIRCLE_HOME/miniconda ENV CONDA_BIN=$CIRCLE_HOME/miniconda/bin/conda -ENV MINICONDA2_VERSION=4.3.31 +ENV MINICONDA2_VERSION=4.5.11 RUN curl -sO https://repo.continuum.io/miniconda/Miniconda2-${MINICONDA2_VERSION}-Linux-x86_64.sh \ && bash Miniconda2-${MINICONDA2_VERSION}-Linux-x86_64.sh -b -p ${CONDA_ROOT} \ && $CONDA_BIN clean --all \ + && sudo mkdir -m 777 /home/.conda \ && rm -f Miniconda2-${MINICONDA2_VERSION}-Linux-x86_64.sh # END IMAGE CUSTOMIZATIONS diff --git a/dev/docker-images/python/Dockerfile b/dev/docker-images/python/Dockerfile index d5674357cce69..cb44b373617da 100644 --- a/dev/docker-images/python/Dockerfile +++ b/dev/docker-images/python/Dockerfile @@ -21,15 +21,14 @@ FROM palantirtechnologies/circle-spark-base ENV PATH="$CIRCLE_HOME/.pyenv/bin:$PATH" RUN curl -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer | bash \ && cat >>.bashrc <<<'eval "$($HOME/.pyenv/bin/pyenv init -)"' \ - && cat >>.bashrc <<<'eval "$($HOME/.pyenv/bin/pyenv virtualenv-init -)"' \ - && pyenv doctor + && cat >>.bashrc <<<'eval "$($HOME/.pyenv/bin/pyenv virtualenv-init -)"' # Must install numpy 1.11 or else a bunch of tests break due to different output formatting on e.g. nparray # A version I've tested earlier that I know it breaks with is 1.14.1 RUN mkdir -p $(pyenv root)/versions \ && ln -s $CONDA_ROOT $(pyenv root)/versions/our-miniconda \ - && $CONDA_BIN create -y -n python2 -c anaconda -c conda-forge python==2.7.11 numpy=1.11.2 pyarrow==0.8.0 pandas nomkl \ - && $CONDA_BIN create -y -n python3 -c anaconda -c conda-forge python=3.6 numpy=1.11.2 pyarrow==0.8.0 pandas nomkl \ + && $CONDA_BIN create -y -n python2 -c anaconda -c conda-forge python==2.7.15 numpy=1.14.0 pyarrow==0.8.0 pandas nomkl \ + && $CONDA_BIN create -y -n python3 -c anaconda -c conda-forge python=3.6 numpy=1.14.0 pyarrow==0.8.0 pandas nomkl \ && $CONDA_BIN clean --all RUN pyenv global our-miniconda/envs/python2 our-miniconda/envs/python3 \ diff --git a/dev/docker-images/r/Dockerfile b/dev/docker-images/r/Dockerfile index e660de1de8de4..a4353b3602ddb 100644 --- a/dev/docker-images/r/Dockerfile +++ b/dev/docker-images/r/Dockerfile @@ -20,12 +20,8 @@ FROM palantirtechnologies/circle-spark-base USER root ### Install R -RUN echo "deb http://cran.rstudio.com/bin/linux/ubuntu trusty/" >> /etc/apt/sources.list \ - && apt-get update \ - && gpg --keyserver keyserver.ubuntu.com --recv-key E084DAB9 \ - && gpg -a --export E084DAB9 | sudo apt-key add - \ - && apt-get update \ - && apt-get --assume-yes install r-base r-base-dev qpdf \ +RUN apt-get update \ + && apt-get install r-base r-base-dev qpdf \ && rm -rf /var/lib/apt/lists/* \ && chmod 777 /usr/local/lib/R/site-library \ && /usr/lib/R/bin/R -e "install.packages(c('devtools'), repos='http://cran.us.r-project.org', lib='/usr/local/lib/R/site-library'); devtools::install_github('r-lib/testthat@v2.0.0', lib='/usr/local/lib/R/site-library'); install.packages(c('knitr', 'rmarkdown', 'e1071', 'survival', 'roxygen2', 'lintr'), repos='http://cran.us.r-project.org', lib='/usr/local/lib/R/site-library')" diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 8397a6eb46ba7..c15d443ebbba9 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -2279,20 +2279,20 @@ def test_conda(self): |from pyspark import SparkContext | |sc = SparkContext() - |sc.addCondaPackages('numpy=1.11.1') + |sc.addCondaPackages('numpy=1.14.0') | |# Ensure numpy is accessible on the driver |import numpy |arr = [1, 2, 3] |def mul2(x): | # Also ensure numpy accessible from executor - | assert numpy.version.version == "1.11.1" + | assert numpy.version.version == "1.14.0" | return x * 2 |print(sc.parallelize(arr).map(mul2).collect()) """) props = self.createTempFile("properties", """ |spark.conda.binaryPath {} - |spark.conda.channelUrls https://repo.continuum.io/pkgs/free + |spark.conda.channelUrls https://repo.continuum.io/pkgs/main |spark.conda.bootstrapPackages python=3.5 """.format(os.environ["CONDA_BIN"])) env = dict(os.environ) diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index f6d3b75a0d4bd..11c06b7f0f906 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -84,14 +84,14 @@ class YarnClusterSuite extends BaseYarnClusterSuite { | exit(-1) | sc = SparkContext(conf=SparkConf()) | - | sc.addCondaPackages('numpy=1.11.1') + | sc.addCondaPackages('numpy=1.14.0') | import numpy | | status = open(sys.argv[1],'w') | | # Addict exists only in external-conda-forge, not anaconda | sc.addCondaChannel("https://conda.anaconda.org/conda-forge") - | sc.addCondaPackages('addict=1.0.0') + | sc.addCondaPackages('addict=2.2.0') | | def numpy_multiply(x): | # Ensure package from non-base channel is installed @@ -376,8 +376,8 @@ class YarnClusterSuite extends BaseYarnClusterSuite { val extraConf: Map[String, String] = Map( "spark.conda.binaryPath" -> sys.env("CONDA_BIN"), - "spark.conda.channelUrls" -> "https://repo.continuum.io/pkgs/free", - "spark.conda.bootstrapPackages" -> "python=3.5" + "spark.conda.channelUrls" -> "https://repo.continuum.io/pkgs/main", + "spark.conda.bootstrapPackages" -> "python=3.6" ) val moduleDir =