-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
82 lines (67 loc) · 2.22 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
FROM python:3.8
ARG PROJECT_HOME=/root/airflow
WORKDIR ${PROJECT_HOME}/
#### Airflow
RUN apt-get update && apt-get -y install build-essential
RUN pip install apache-airflow[mysql,crypto]==1.10.12 --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-1.10.12/constraints-3.7.txt"
#### Spark
ARG SPARK_NAME=spark-3.0.1-bin-hadoop2.7
ARG SPARK_EXT_ZIP=tgz
ARG SPARK_FILE_NAME=${SPARK_NAME}.${SPARK_EXT_ZIP}
RUN wget https://mirrors.sonic.net/apache/spark/spark-3.0.1/${SPARK_FILE_NAME}
RUN tar -xvzf ${SPARK_FILE_NAME}
# for jdbc
#RUN cp jars/* ${SPARK_NANE}/jars/
ENV SPARK_HOME ${SPARK_NAME}
RUN export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
#### Mysql
RUN pip install mysql-connector-python
RUN pip install pymysql
#RUN add-apt-repository "deb http://repo.mysql.com/apt/ubuntu/ bionic mysql-8.0"
#RUN apt-get install mysql-client -y
RUN apt-get install -y default-mysql-client
# Core project
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
#RUN pip install pyspark
#RUN pip install pandas
#RUN pip install pandas-datareader
#RUN pip install bs4
#RUN pip install sqlalchemy
RUN pip install pytest
# front-end
RUN pip install Flask
RUN pip install jupyter
COPY setup.sh /root/airflow/setup.sh
RUN chmod +x setup.sh
COPY airflow_test.sh airflow_test.sh
RUN chmod +x airflow_test.sh
COPY config.cnf config.cnf
COPY dags dags
COPY sql sql
COPY tasks tasks
COPY flaskapp flaskapp
COPY db db
COPY data data
COPY tests tests
COPY jars jars
COPY jars/* spark-3.0.1-bin-hadoop2.7/jars/
RUN cp jars/* spark-3.0.1-bin-hadoop2.7/jars/
###############################
## Begin JAVA installation
###############################
# Java is required in order to spark-submit work
# Install OpenJDK-8
RUN apt-get update && \
apt-get install -y software-properties-common && \
apt-get install -y gnupg2 && \
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys EB9B1D8886F44E2A && \
add-apt-repository "deb http://security.debian.org/debian-security stretch/updates main" && \
apt-get update && \
apt-get install -y openjdk-8-jdk && \
pip freeze && \
java -version $$ \
javac -version
# Setup JAVA_HOME
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
RUN export JAVA_HOME