Skip to content

Commit

Permalink
Add Hive integration tests (apache#207)
Browse files Browse the repository at this point in the history
* Add Hive for CI

* Add Hive integration tests

* Add missing licenses

* Fix

* Remove Arrow

* Add catalog

* Update test suite

* Whitespace
  • Loading branch information
Fokko authored Jan 17, 2024
1 parent 7deb739 commit 06e2b2d
Show file tree
Hide file tree
Showing 10 changed files with 466 additions and 353 deletions.
10 changes: 4 additions & 6 deletions dev/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,8 @@ WORKDIR ${SPARK_HOME}

ENV SPARK_VERSION=3.4.2
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.4_2.12
ENV ICEBERG_VERSION=1.4.0
ENV AWS_SDK_VERSION=2.20.18
ENV PYICEBERG_VERSION=0.4.0
ENV ICEBERG_VERSION=1.4.2
ENV PYICEBERG_VERSION=0.5.1

RUN curl --retry 3 -s -C - https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \
Expand All @@ -51,8 +50,7 @@ RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runt
&& mv iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar /opt/spark/jars

# Download AWS bundle
RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo iceberg-aws-bundle-${ICEBERG_VERSION}.jar \
&& mv iceberg-aws-bundle-${ICEBERG_VERSION}.jar /opt/spark/jars
RUN curl -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar

COPY spark-defaults.conf /opt/spark/conf
ENV PATH="/opt/spark/sbin:/opt/spark/bin:${PATH}"
Expand All @@ -62,7 +60,7 @@ RUN chmod u+x /opt/spark/sbin/* && \

RUN pip3 install -q ipython

RUN pip3 install "pyiceberg[s3fs]==${PYICEBERG_VERSION}"
RUN pip3 install "pyiceberg[s3fs,hive]==${PYICEBERG_VERSION}"

COPY entrypoint.sh .
COPY provision.py .
Expand Down
14 changes: 14 additions & 0 deletions dev/docker-compose-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ services:
iceberg_net:
depends_on:
- rest
- hive
- minio
volumes:
- ./warehouse:/home/iceberg/warehouse
Expand All @@ -37,6 +38,7 @@ services:
- 8080:8080
links:
- rest:rest
- hive:hive
- minio:minio
rest:
image: tabulario/iceberg-rest
Expand Down Expand Up @@ -85,5 +87,17 @@ services:
/usr/bin/mc policy set public minio/warehouse;
tail -f /dev/null
"
hive:
build: hive/
container_name: hive
hostname: hive
networks:
iceberg_net:
ports:
- 9083:9083
environment:
SERVICE_NAME: "metastore"
SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/"

networks:
iceberg_net:
34 changes: 34 additions & 0 deletions dev/hive/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM openjdk:8-jre-slim AS build

RUN apt-get update -qq && apt-get -qq -y install curl

ENV AWSSDK_VERSION=2.20.18
ENV HADOOP_VERSION=3.1.0

RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.271/aws-java-sdk-bundle-1.11.271.jar -Lo /tmp/aws-java-sdk-bundle-1.11.271.jar
RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar -Lo /tmp/hadoop-aws-${HADOOP_VERSION}.jar


FROM apache/hive:3.1.3

ENV AWSSDK_VERSION=2.20.18
ENV HADOOP_VERSION=3.1.0

COPY --from=build /tmp/hadoop-aws-${HADOOP_VERSION}.jar /opt/hive/lib/hadoop-aws-${HADOOP_VERSION}.jar
COPY --from=build /tmp/aws-java-sdk-bundle-1.11.271.jar /opt/hive/lib/aws-java-sdk-bundle-1.11.271.jar
COPY core-site.xml /opt/hadoop/etc/hadoop/core-site.xml
53 changes: 53 additions & 0 deletions dev/hive/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<configuration>
<property>
<name>fs.defaultFS</name>
<value>s3a://warehouse/hive</value>
</property>
<property>
<name>fs.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>fs.s3a.fast.upload</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.endpoint</name>
<value>http://minio:9000</value>
</property>
<property>
<name>fs.s3a.access.key</name>
<value>admin</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>password</value>
</property>
<property>
<name>fs.s3a.connection.ssl.enabled</name>
<value>false</value>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
</property>
</configuration>
Loading

0 comments on commit 06e2b2d

Please sign in to comment.