Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Trino + Spark Views example #143

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions trino-example/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
trino-410-views.tar.gz filter=lfs diff=lfs merge=lfs -text
52 changes: 52 additions & 0 deletions trino-example/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Compiled class file
*.class

# Log file
*.log

# BlueJ files
*.ctxt

# Mobile Tools for Java (J2ME)
.mtj.tmp/

# Package Files #
*.jar
*.war
*.nar
*.ear
*.zip
*.tar.gz
*.rar

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
replay_pid*

.gradle
**/build/
!src/**/build/

# Ignore Gradle GUI config
gradle-app.setting

# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
!gradle-wrapper.jar

# Avoid ignore Gradle wrappper properties
!gradle-wrapper.properties

# Cache of project
.gradletasknamecache

# Eclipse Gradle plugin generated files
# Eclipse Core
.project
# JDT-specific (Eclipse Java Development Tools)
.classpath

# MacOS
.DS_Store

# IDEA
.idea
34 changes: 34 additions & 0 deletions trino-example/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# Iceberg Trino Demo

This demo extends the Iceberg demo setup to include Trino.

Clone this repository, change into the `flink-example` directory, and start up the docker environment.
```sh
git clone [email protected]:tabular-io/docker-spark-iceberg.git

cd trino-example

docker load < trino-410-views.tar.gz

docker compose up
```

102 changes: 102 additions & 0 deletions trino-example/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
---
version: "3"

services:
trino-iceberg:
image: 'trino:iceberg-view-support-amd64'
container_name: trino-iceberg
ports:
- 8080:8080
volumes:
- ./etc:/etc/trino
networks:
iceberg_net:

spark-iceberg:
image: tabulario/spark-iceberg
container_name: spark-iceberg
build: ../spark
networks:
iceberg_net:
depends_on:
- rest
- minio
volumes:
- ./warehouse:/home/iceberg/warehouse
- ./notebooks:/home/iceberg/notebooks/notebooks
- ./trino:/usr/local/bin/trino
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
ports:
- 8888:8888
- 8081:8080
- 10000:10000
- 10001:10001
rest:
image: tabulario/iceberg-rest
container_name: iceberg-rest
networks:
iceberg_net:
ports:
- 8181:8181
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- CATALOG_WAREHOUSE=s3://warehouse/
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000
minio:
image: minio/minio
container_name: minio
environment:
- MINIO_ROOT_USER=admin
- MINIO_ROOT_PASSWORD=password
- MINIO_DOMAIN=minio
networks:
iceberg_net:
aliases:
- warehouse.minio
ports:
- 9001:9001
- 9000:9000
command: ["server", "/data", "--console-address", ":9001"]
mc:
depends_on:
- minio
image: minio/mc
container_name: mc
networks:
iceberg_net:
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc rm -r --force minio/warehouse;
/usr/bin/mc mb minio/warehouse;
/usr/bin/mc policy set public minio/warehouse;
tail -f /dev/null
"
networks:
iceberg_net:
10 changes: 10 additions & 0 deletions trino-example/etc/catalog/iceberg.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
connector.name=iceberg
iceberg.catalog.type=rest
iceberg.rest-catalog.uri=http://rest:8181

fs.native-s3.enabled=true
s3.endpoint=http://minio:9000
s3.aws-access-key=admin
s3.aws-secret-key=password
s3.path-style-access=true
s3.region=us-east-1
2 changes: 2 additions & 0 deletions trino-example/etc/catalog/tpcds.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
connector.name=tpcds
tpcds.splits-per-node=4
2 changes: 2 additions & 0 deletions trino-example/etc/catalog/tpch.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
connector.name=tpch
tpch.splits-per-node=4
6 changes: 6 additions & 0 deletions trino-example/etc/config.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#single node install config
coordinator=true
node-scheduler.include-coordinator=true
http-server.http.port=8080
discovery-server.enabled=true
discovery.uri=http://localhost:8080
20 changes: 20 additions & 0 deletions trino-example/etc/jvm.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
-server
-Xmx1G
-XX:InitialRAMPercentage=80
-XX:MaxRAMPercentage=80
-XX:G1HeapRegionSize=32M
-XX:+ExplicitGCInvokesConcurrent
-XX:+ExitOnOutOfMemoryError
-XX:+HeapDumpOnOutOfMemoryError
-XX:-OmitStackTraceInFastThrow
-XX:ReservedCodeCacheSize=512M
-XX:PerMethodRecompilationCutoff=10000
-XX:PerBytecodeRecompilationCutoff=10000
-Djdk.attach.allowAttachSelf=true
-Djdk.nio.maxCachedBufferSize=2000000
-Dfile.encoding=UTF-8
# Reduce starvation of threads by GClocker, recommend to set about the number of cpu cores (JDK-8192647)
-XX:+UnlockDiagnosticVMOptions
-XX:GCLockerRetryAllocationCount=32
# Allow loading dynamic agent used by JOL
-XX:+EnableDynamicAgentLoading
2 changes: 2 additions & 0 deletions trino-example/etc/log.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Enable verbose logging from Trino
#io.trino=DEBUG
3 changes: 3 additions & 0 deletions trino-example/etc/node.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
node.environment=docker
node.data-dir=/data/trino
plugin.dir=/usr/lib/trino/plugin
Loading
Loading