diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java index 28805aae63c1e3..a73a2065d0ffaf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java @@ -25,6 +25,7 @@ import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.UserException; +import org.apache.doris.common.util.FileFormatUtils; import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.FileSplit; @@ -247,8 +248,15 @@ protected void setScanParams(TFileRangeDesc rangeDesc, Split split) { && !sessionVariable.isForceJniScanner() && hudiSplit.getHudiDeltaLogs().isEmpty()) { // no logs, is read optimize table, fallback to use native reader - // TODO: support read orc hudi table in native reader - rangeDesc.setFormatType(TFileFormatType.FORMAT_PARQUET); + String fileFormat = FileFormatUtils.getFileFormatBySuffix(hudiSplit.getDataFilePath()) + .orElse("Unknown"); + if (fileFormat.equals("parquet")) { + rangeDesc.setFormatType(TFileFormatType.FORMAT_PARQUET); + } else if (fileFormat.equals("orc")) { + rangeDesc.setFormatType(TFileFormatType.FORMAT_ORC); + } else { + throw new RuntimeException("Unsupported file format: " + fileFormat); + } } setHudiParams(rangeDesc, hudiSplit); } @@ -495,7 +503,7 @@ private HudiSplit generateHudiSplit(FileSlice fileSlice, List partitionV List logs = fileSlice.getLogFiles().map(HoodieLogFile::getPath) .map(StoragePath::toString) .collect(Collectors.toList()); - if (logs.isEmpty()) { + if (logs.isEmpty() && !sessionVariable.isForceJniScanner()) { noLogsSplitNum.incrementAndGet(); } diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_orc_tables.out b/regression-test/data/external_table_p2/hudi/test_hudi_orc_tables.out new file mode 100644 index 00000000000000..9e28074dc9114e --- /dev/null +++ b/regression-test/data/external_table_p2/hudi/test_hudi_orc_tables.out @@ -0,0 +1,15 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !cow -- +20241204190011744 20241204190011744_0_6 20241204190011744_0_0 a99e363a-6c10-40f3-a675-9117506d1a43-0_0-38-94_20241204190011744.orc 1 A +20241204190011744 20241204190011744_0_7 20241204190011744_2_0 a99e363a-6c10-40f3-a675-9117506d1a43-0_0-38-94_20241204190011744.orc 3 C +20241204190011744 20241204190011744_0_8 20241204190011744_4_0 a99e363a-6c10-40f3-a675-9117506d1a43-0_0-38-94_20241204190011744.orc 5 E +20241204190011744 20241204190011744_0_9 20241204190011744_1_0 a99e363a-6c10-40f3-a675-9117506d1a43-0_0-38-94_20241204190011744.orc 2 B +20241204190011744 20241204190011744_0_10 20241204190011744_3_0 a99e363a-6c10-40f3-a675-9117506d1a43-0_0-38-94_20241204190011744.orc 4 D + +-- !mor -- +20241204190002046 20241204190002046_0_11 20241204190002046_0_0 b1e68412-01d6-467f-b4c2-b4b18ec71346-0_0-30-75_20241204190002046.orc 1 A +20241204190002046 20241204190002046_0_12 20241204190002046_2_0 b1e68412-01d6-467f-b4c2-b4b18ec71346-0_0-30-75_20241204190002046.orc 3 C +20241204190002046 20241204190002046_0_13 20241204190002046_4_0 b1e68412-01d6-467f-b4c2-b4b18ec71346-0_0-30-75_20241204190002046.orc 5 E +20241204190002046 20241204190002046_0_14 20241204190002046_1_0 b1e68412-01d6-467f-b4c2-b4b18ec71346-0_0-30-75_20241204190002046.orc 2 B +20241204190002046 20241204190002046_0_15 20241204190002046_3_0 b1e68412-01d6-467f-b4c2-b4b18ec71346-0_0-30-75_20241204190002046.orc 4 D + diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_timestamp.out b/regression-test/data/external_table_p2/hudi/test_hudi_timestamp.out index dc47ff86d90a8d..9bdb0f7cb7285f 100644 --- a/regression-test/data/external_table_p2/hudi/test_hudi_timestamp.out +++ b/regression-test/data/external_table_p2/hudi/test_hudi_timestamp.out @@ -1,6 +1,31 @@ -- This file is automatically generated. You should know what you did if you want to edit this --- !timestamp -- +-- !timestamp1 -- 20241115015956800 20241115015956800_0_2 1 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 1 Alice 2024-10-25T08:00 -20241115015956800 20241115015956800_0_0 2 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 2 Bob 2024-10-25T09:30:00 -20241115015956800 20241115015956800_0_1 3 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 3 Charlie 2024-10-25T11:00:00 +20241115015956800 20241115015956800_0_0 2 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 2 Bob 2024-10-25T09:30 +20241115015956800 20241115015956800_0_1 3 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 3 Charlie 2024-10-25T11:00 + +-- !timestamp2 -- +20241115015956800 20241115015956800_0_2 1 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 1 Alice 2024-10-25T23:00 +20241115015956800 20241115015956800_0_0 2 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 2 Bob 2024-10-26T00:30 +20241115015956800 20241115015956800_0_1 3 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 3 Charlie 2024-10-26T02:00 + +-- !timestamp3 -- +20241115015956800 20241115015956800_0_2 1 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 1 Alice 2024-10-25T15:00 +20241115015956800 20241115015956800_0_0 2 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 2 Bob 2024-10-25T16:30 +20241115015956800 20241115015956800_0_1 3 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 3 Charlie 2024-10-25T18:00 + +-- !timestamp1 -- +20241115015956800 20241115015956800_0_2 1 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 1 Alice 2024-10-25T08:00 +20241115015956800 20241115015956800_0_0 2 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 2 Bob 2024-10-25T09:30 +20241115015956800 20241115015956800_0_1 3 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 3 Charlie 2024-10-25T11:00 + +-- !timestamp2 -- +20241115015956800 20241115015956800_0_2 1 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 1 Alice 2024-10-25T23:00 +20241115015956800 20241115015956800_0_0 2 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 2 Bob 2024-10-26T00:30 +20241115015956800 20241115015956800_0_1 3 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 3 Charlie 2024-10-26T02:00 + +-- !timestamp3 -- +20241115015956800 20241115015956800_0_2 1 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 1 Alice 2024-10-25T15:00 +20241115015956800 20241115015956800_0_0 2 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 2 Bob 2024-10-25T16:30 +20241115015956800 20241115015956800_0_1 3 eec4913a-0d5f-4b8b-a0f5-934e252c2e45-0_0-7-14_20241115015956800.parquet 3 Charlie 2024-10-25T18:00 diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_orc_tables.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_orc_tables.groovy new file mode 100644 index 00000000000000..43638a23881e0e --- /dev/null +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_orc_tables.groovy @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_orc_tables", "p2,external,hudi,external_remote,external_remote_hudi") { + String enabled = context.config.otherConfigs.get("enableExternalHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + } + + String catalog_name = "test_hudi_orc_tables" + String props = context.config.otherConfigs.get("hudiEmrCatalog") + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + ${props} + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + + qt_cow """ select * from orc_hudi_table_cow; """ + qt_mor """ select * from orc_hudi_table_mor; """ + + sql """drop catalog if exists ${catalog_name};""" +} \ No newline at end of file diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_timestamp.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_timestamp.groovy index 36309322558f52..3d7bd40b2d54cf 100644 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_timestamp.groovy +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_timestamp.groovy @@ -34,8 +34,22 @@ suite("test_hudi_timestamp", "p2,external,hudi,external_remote,external_remote_h sql """ use regression_hudi;""" sql """ set enable_fallback_to_original_planner=false """ - // TODO: fix hudi timezone issue and enable this - // qt_timestamp """ select * from hudi_table_with_timestamp order by id; """ + def test_timestamp_different_timezones = { + sql """set time_zone = 'America/Los_Angeles';""" + qt_timestamp1 """ select * from hudi_table_with_timestamp order by id; """ + sql """set time_zone = 'Asia/Shanghai';""" + qt_timestamp2 """ select * from hudi_table_with_timestamp order by id; """ + sql """set time_zone = 'UTC';""" + qt_timestamp3 """ select * from hudi_table_with_timestamp order by id; """ + } + + // test native reader + test_timestamp_different_timezones() + sql """ set force_jni_scanner = true; """ + // test jni reader + test_timestamp_different_timezones() + sql """ set force_jni_scanner = false; """ + sql """drop catalog if exists ${catalog_name};""" }