From 9d4cfb25408780c7944affea1d3ff51a0ec41b44 Mon Sep 17 00:00:00 2001 From: Deepak Majeti Date: Wed, 16 Oct 2024 18:34:04 -0700 Subject: [PATCH] Remove config property hive.gcs.scheme (#11235) Summary: The GCS adapter expects two configs `hive.gcs.scheme` value for the scheme and `hive.gcs.endpoint` value without the scheme. However, in practice, the endpoint value contains the scheme. We can remove `hive.gcs.scheme` to be consistent. See Trino for example https://trino.io/docs/current/admin/fault-tolerant-execution.html#google-cloud-storage Pull Request resolved: https://github.com/facebookincubator/velox/pull/11235 Reviewed By: bikramSingh91 Differential Revision: D64335881 Pulled By: kgpai fbshipit-source-id: 47db7ed37e562ca14f5ff2dafa45f9b0813d63da --- velox/connectors/hive/HiveConfig.cpp | 4 ---- velox/connectors/hive/HiveConfig.h | 5 ---- .../storage_adapters/gcs/GCSFileSystem.cpp | 23 +++++++++++-------- .../connectors/hive/tests/HiveConfigTest.cpp | 4 ---- velox/docs/configs.rst | 4 ---- 5 files changed, 14 insertions(+), 26 deletions(-) diff --git a/velox/connectors/hive/HiveConfig.cpp b/velox/connectors/hive/HiveConfig.cpp index 2e7046819e8a..837d18aeeb4b 100644 --- a/velox/connectors/hive/HiveConfig.cpp +++ b/velox/connectors/hive/HiveConfig.cpp @@ -139,10 +139,6 @@ std::string HiveConfig::gcsEndpoint() const { return config_->get(kGCSEndpoint, std::string("")); } -std::string HiveConfig::gcsScheme() const { - return config_->get(kGCSScheme, std::string("https")); -} - std::string HiveConfig::gcsCredentialsPath() const { return config_->get(kGCSCredentialsPath, std::string("")); } diff --git a/velox/connectors/hive/HiveConfig.h b/velox/connectors/hive/HiveConfig.h index 2888125b2167..800bf498ac0b 100644 --- a/velox/connectors/hive/HiveConfig.h +++ b/velox/connectors/hive/HiveConfig.h @@ -107,9 +107,6 @@ class HiveConfig { /// The GCS storage endpoint server. static constexpr const char* kGCSEndpoint = "hive.gcs.endpoint"; - /// The GCS storage scheme, https for default credentials. - static constexpr const char* kGCSScheme = "hive.gcs.scheme"; - /// The GCS service account configuration JSON key file. static constexpr const char* kGCSCredentialsPath = "hive.gcs.json-key-file-path"; @@ -296,8 +293,6 @@ class HiveConfig { std::string gcsEndpoint() const; - std::string gcsScheme() const; - std::string gcsCredentialsPath() const; std::optional gcsMaxRetryCount() const; diff --git a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp b/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp index 583a25c291e8..07d85f5746b5 100644 --- a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp @@ -266,13 +266,23 @@ class GCSFileSystem::Impl { // Use the input Config parameters and initialize the GCSClient. void initializeClient() { + constexpr std::string_view kHttpsScheme{"https://"}; auto options = gc::Options{}; - auto scheme = hiveConfig_->gcsScheme(); - if (scheme == "https") { + auto endpointOverride = hiveConfig_->gcsEndpoint(); + // Use secure credentials by default. + if (!endpointOverride.empty()) { + options.set(endpointOverride); + // Use Google default credentials if endpoint has https scheme. + if (endpointOverride.find(kHttpsScheme) == 0) { + options.set( + gc::MakeGoogleDefaultCredentials()); + } else { + options.set( + gc::MakeInsecureCredentials()); + } + } else { options.set( gc::MakeGoogleDefaultCredentials()); - } else { - options.set(gc::MakeInsecureCredentials()); } options.set(kUploadBufferSize); @@ -290,11 +300,6 @@ class GCSFileSystem::Impl { gcs::LimitedTimeRetryPolicy(retry_time).clone()); } - auto endpointOverride = hiveConfig_->gcsEndpoint(); - if (!endpointOverride.empty()) { - options.set(scheme + "://" + endpointOverride); - } - auto credFile = hiveConfig_->gcsCredentialsPath(); if (!credFile.empty() && std::filesystem::exists(credFile)) { std::ifstream jsonFile(credFile, std::ios::in); diff --git a/velox/connectors/hive/tests/HiveConfigTest.cpp b/velox/connectors/hive/tests/HiveConfigTest.cpp index 511191176612..875002dbd0b1 100644 --- a/velox/connectors/hive/tests/HiveConfigTest.cpp +++ b/velox/connectors/hive/tests/HiveConfigTest.cpp @@ -43,7 +43,6 @@ TEST(HiveConfigTest, defaultConfig) { ASSERT_EQ(hiveConfig.s3IAMRole(), std::nullopt); ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox-session"); ASSERT_EQ(hiveConfig.gcsEndpoint(), ""); - ASSERT_EQ(hiveConfig.gcsScheme(), "https"); ASSERT_EQ(hiveConfig.gcsCredentialsPath(), ""); ASSERT_EQ(hiveConfig.isOrcUseColumnNames(emptySession.get()), false); ASSERT_EQ( @@ -96,7 +95,6 @@ TEST(HiveConfigTest, overrideConfig) { {HiveConfig::kS3IamRole, "hello"}, {HiveConfig::kS3IamRoleSessionName, "velox"}, {HiveConfig::kGCSEndpoint, "hey"}, - {HiveConfig::kGCSScheme, "http"}, {HiveConfig::kGCSCredentialsPath, "hey"}, {HiveConfig::kOrcUseColumnNames, "true"}, {HiveConfig::kFileColumnNamesReadAsLowerCase, "true"}, @@ -136,7 +134,6 @@ TEST(HiveConfigTest, overrideConfig) { ASSERT_EQ(hiveConfig.s3IAMRole(), std::optional("hello")); ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox"); ASSERT_EQ(hiveConfig.gcsEndpoint(), "hey"); - ASSERT_EQ(hiveConfig.gcsScheme(), "http"); ASSERT_EQ(hiveConfig.gcsCredentialsPath(), "hey"); ASSERT_EQ(hiveConfig.isOrcUseColumnNames(emptySession.get()), true); ASSERT_EQ( @@ -211,7 +208,6 @@ TEST(HiveConfigTest, overrideSession) { ASSERT_EQ(hiveConfig.s3IAMRole(), std::nullopt); ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox-session"); ASSERT_EQ(hiveConfig.gcsEndpoint(), ""); - ASSERT_EQ(hiveConfig.gcsScheme(), "https"); ASSERT_EQ(hiveConfig.gcsCredentialsPath(), ""); ASSERT_EQ(hiveConfig.isOrcUseColumnNames(session.get()), true); ASSERT_EQ(hiveConfig.isFileColumnNamesReadAsLowerCase(session.get()), true); diff --git a/velox/docs/configs.rst b/velox/docs/configs.rst index bfaffe622a19..6728ccf1d318 100644 --- a/velox/docs/configs.rst +++ b/velox/docs/configs.rst @@ -638,10 +638,6 @@ Each query can override the config by setting corresponding query session proper - string - - The GCS storage endpoint server. - * - hive.gcs.scheme - - string - - - - The GCS storage scheme, https for default credentials. * - hive.gcs.json-key-file-path - string -