Skip to content

Commit

Permalink
[fix](hive)fix select count(*) hive full acid tb opt error. (apache#4…
Browse files Browse the repository at this point in the history
…6732)

Problem Summary:
before pr : apache#44038
In the previous PR, the generation method of split in the count( * )
scenario was optimized.
However, there were some problems with the hive acid table. This PR
mainly fixes this and adds tests.
In the count( * ) scenario, reading the hive full acid table cannot be
optimized, and the file still needs to be split (merge on read is
required), and the hive insert only acid table does not need to be
split.
  • Loading branch information
hubgeter committed Jan 11, 2025
1 parent 347277f commit b0a1514
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 2 deletions.
1 change: 1 addition & 0 deletions be/src/vec/exec/format/table/transactional_hive_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range,
++num_delete_files;
}
if (num_delete_rows > 0) {
orc_reader->set_push_down_agg_type(TPushAggOp::NONE);
orc_reader->set_delete_rows(&_delete_rows);
COUNTER_UPDATE(_transactional_orc_profile.num_delete_files, num_delete_files);
COUNTER_UPDATE(_transactional_orc_profile.num_delete_rows, num_delete_rows);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,12 @@ private void getFileSplitByPartitions(HiveMetaStoreCache cache, List<HivePartiti
* we don't need to split the file because for parquet/orc format, only metadata is read.
* If we split the file, we will read metadata of a file multiple times, which is not efficient.
*
* - Hive Transactional Table may need merge on read, so do not apply this optimization.
* - Hive Full Acid Transactional Table may need merge on read, so do not apply this optimization.
* - If the file format is not parquet/orc, eg, text, we need to split the file to increase the parallelism.
*/
boolean needSplit = true;
if (getPushDownAggNoGroupingOp() == TPushAggOp.COUNT
&& hiveTransaction != null) {
&& !(hmsTable.isHiveTransactionalTable() && hmsTable.isFullAcidTable())) {
int totalFileNum = 0;
for (FileCacheValue fileCacheValue : fileCaches) {
if (fileCacheValue.getFiles() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,18 @@ F
-- !16 --
4 DD

-- !count_1 --
3

-- !count_2 --
6

-- !count_3 --
4

-- !count_4 --
3

-- !count_5 --
3

Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,13 @@
3 C
4 D
5 E

-- !count_1 --
4

-- !count_2 --
5

-- !count_3 --
5

Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock

}
}

def test_acid_count = {
qt_count_1 """ select count(*) from orc_full_acid; """ // 3
qt_count_2 """ select count(*) from orc_full_acid_par; """ // 6
qt_count_3 """ select count(*) from orc_to_acid_compacted_tb; """ //4
qt_count_4 """ select count(*) from orc_acid_minor; """ //3
qt_count_5 """ select count(*) from orc_acid_major; """ //3
}


String enabled = context.config.otherConfigs.get("enableHiveTest")
Expand Down Expand Up @@ -149,6 +157,10 @@ suite("test_transactional_hive", "p0,external,hive,external_docker,external_dock
test_acid()
test_acid_write()


test_acid_count()


sql """drop catalog if exists ${catalog_name}"""
} finally {
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ suite("test_hive_translation_insert_only", "p2,external,hive,external_remote,ext
qt_2 """ select * from parquet_insert_only_major order by id """
qt_3 """ select * from orc_insert_only_minor order by id """

qt_count_1 """ select count(*) from text_insert_only """ //4
qt_count_2 """ select count(*) from parquet_insert_only_major """ //5
qt_count_3 """ select count(*) from orc_insert_only_minor """ //5


sql """drop catalog ${hms_catalog_name};"""
}

Expand Down

0 comments on commit b0a1514

Please sign in to comment.