diff --git a/.github/services/hdfs/hdfs_default_gcs/action.yml b/.github/services/hdfs/hdfs_default_gcs/action.yml new file mode 100644 index 000000000000..c8511d28060d --- /dev/null +++ b/.github/services/hdfs/hdfs_default_gcs/action.yml @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: hdfs_default_gcs +description: 'Behavior test for hdfs default over gcs' + +runs: + using: "composite" + steps: + - name: Setup java env + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: "11" + - name: Setup + shell: bash + run: | + curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner + + export HADOOP_HOME="/home/runner/hadoop-3.3.5" + export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) + + curl -LsSf -o ${HADOOP_HOME}/share/hadoop/common/lib/gcs-connector-hadoop3-2.2.19-shaded.jar https://github.com/GoogleCloudDataproc/hadoop-connectors/releases/download/v2.2.19/gcs-connector-hadoop3-2.2.19-shaded.jar + + cp ./fixtures/hdfs/hdfs-site.xml ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml + cp ./fixtures/hdfs/gcs-core-site.xml ${HADOOP_HOME}/etc/hadoop/core-site.xml + + cat << EOF >> $GITHUB_ENV + HADOOP_HOME=${HADOOP_HOME} + CLASSPATH=${CLASSPATH} + LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${HADOOP_HOME}/lib/native + OPENDAL_HDFS_ROOT=/tmp/opendal/ + OPENDAL_HDFS_NAME_NODE=gs://bucket_name + OPENDAL_HDFS_ENABLE_APPEND=false + EOF diff --git a/core/src/services/hdfs/docs.md b/core/src/services/hdfs/docs.md index c9289159f5b9..7e4d88211281 100644 --- a/core/src/services/hdfs/docs.md +++ b/core/src/services/hdfs/docs.md @@ -121,6 +121,7 @@ async fn main() -> Result<()> { // Create fs backend builder. let mut builder = Hdfs::default(); // Set the name node for hdfs. + // If the string starts with a protocol type such as file://, hdfs://, or gs://, this protocol type will be used. builder.name_node("hdfs://127.0.0.1:9000"); // Set the root for hdfs, all operations will happen under this root. // diff --git a/fixtures/hdfs/gcs-core-site.xml b/fixtures/hdfs/gcs-core-site.xml new file mode 100644 index 000000000000..18c4516d601a --- /dev/null +++ b/fixtures/hdfs/gcs-core-site.xml @@ -0,0 +1,48 @@ + + + + + + + + + fs.AbstractFileSystem.gs.impl + com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS + The AbstractFileSystem for 'gs:' URIs. + + + fs.gs.project.id + + + Optional. Google Cloud Project ID with access to GCS buckets. + Required only for list buckets and create bucket operations. + + + + google.cloud.auth.type + SERVICE_ACCOUNT_JSON_KEYFILE + + Authentication type to use for GCS access. + + + + google.cloud.auth.service.account.json.keyfile + /path/to/json_key_file + + The JSON keyfile of the service account used for GCS + access when google.cloud.auth.type is SERVICE_ACCOUNT_JSON_KEYFILE. + + +