From a64e9a5ed00303946b7f484b22a79dd01271a16c Mon Sep 17 00:00:00 2001 From: Armando Zhu Date: Wed, 7 Feb 2024 00:34:36 -0800 Subject: [PATCH] ci: Add test case for hdfs over gcs bucket (#4145) * ci: Add test case for hdfs over gcs bucket (#3504) * Update xml headers * Modify CLASSPATH --- .../services/hdfs/hdfs_default_gcs/action.yml | 62 +++++++++++++++++++ core/src/services/hdfs/docs.md | 1 + fixtures/hdfs/gcs-core-site.xml | 56 +++++++++++++++++ 3 files changed, 119 insertions(+) create mode 100644 .github/services/hdfs/hdfs_default_gcs/action.yml create mode 100644 fixtures/hdfs/gcs-core-site.xml diff --git a/.github/services/hdfs/hdfs_default_gcs/action.yml b/.github/services/hdfs/hdfs_default_gcs/action.yml new file mode 100644 index 000000000000..69aabeba75d8 --- /dev/null +++ b/.github/services/hdfs/hdfs_default_gcs/action.yml @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: hdfs_default_gcs +description: 'Behavior test for hdfs default over gcs' + +runs: + using: "composite" + steps: + - name: Setup java env + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: "11" + - name: Load secrets + uses: 1password/load-secrets-action@v1 + with: + export-env: true + env: + OPENDAL_GCS_ROOT: op://services/gcs/root + OPENDAL_GCS_BUCKET: op://services/gcs/bucket + OPENDAL_GCS_CREDENTIAL: op://services/gcs/credential + - name: Setup + shell: bash + run: | + curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner + + export HADOOP_HOME="/home/runner/hadoop-3.3.5" + + curl -LsSf -o ${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-hadoop3-2.2.19-shaded.jar https://github.com/GoogleCloudDataproc/hadoop-connectors/releases/download/v2.2.19/gcs-connector-hadoop3-2.2.19-shaded.jar + + export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) + + cp ./fixtures/hdfs/hdfs-site.xml ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml + cp ./fixtures/hdfs/gcs-core-site.xml ${HADOOP_HOME}/etc/hadoop/core-site.xml + + cat << EOF >> $GITHUB_ENV + HADOOP_HOME=${HADOOP_HOME} + CLASSPATH=${CLASSPATH} + LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${HADOOP_HOME}/lib/native + OPENDAL_HDFS_ROOT=${OPENDAL_GCS_ROOT} + OPENDAL_HDFS_NAME_NODE=gs://${OPENDAL_GCS_BUCKET} + OPENDAL_HDFS_ENABLE_APPEND=false + EOF + + mkdir -p /tmp/hdfs + + echo ${OPENDAL_GCS_CREDENTIAL} | base64 -d > /tmp/hdfs/gcs-credentials.json diff --git a/core/src/services/hdfs/docs.md b/core/src/services/hdfs/docs.md index c9289159f5b9..7e4d88211281 100644 --- a/core/src/services/hdfs/docs.md +++ b/core/src/services/hdfs/docs.md @@ -121,6 +121,7 @@ async fn main() -> Result<()> { // Create fs backend builder. let mut builder = Hdfs::default(); // Set the name node for hdfs. + // If the string starts with a protocol type such as file://, hdfs://, or gs://, this protocol type will be used. builder.name_node("hdfs://127.0.0.1:9000"); // Set the root for hdfs, all operations will happen under this root. // diff --git a/fixtures/hdfs/gcs-core-site.xml b/fixtures/hdfs/gcs-core-site.xml new file mode 100644 index 000000000000..e8d7418a29e2 --- /dev/null +++ b/fixtures/hdfs/gcs-core-site.xml @@ -0,0 +1,56 @@ + + + + + + + + + + fs.AbstractFileSystem.gs.impl + com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS + The AbstractFileSystem for 'gs:' URIs. + + + fs.gs.project.id + + + Optional. Google Cloud Project ID with access to GCS buckets. + Required only for list buckets and create bucket operations. + + + + google.cloud.auth.type + SERVICE_ACCOUNT_JSON_KEYFILE + + Authentication type to use for GCS access. + + + + google.cloud.auth.service.account.json.keyfile + /tmp/hdfs/gcs-credentials.json + + The JSON keyfile of the service account used for GCS + access when google.cloud.auth.type is SERVICE_ACCOUNT_JSON_KEYFILE. + + +