From 0ded2d97b63e42da7e5ce900ee06920969b66d2f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 29 Nov 2024 19:26:12 +0800 Subject: [PATCH] branch-3.0: [test](mtmv)add create partition and partition scrolling (#42852) Cherry-picked from #42402 Co-authored-by: zfr95 <87513668+zfr9527@users.noreply.github.com> --- .../create_commit_mtmv_many_task.groovy | 200 ++++++++++++++ .../range_date_datetrunc_part_up.groovy | 142 ++++++++++ .../range_date_part_up.groovy | 207 ++++++++++++++ .../range_date_part_up_rewrite.groovy | 213 +++++++++++++++ .../range_datetime_part_up_rewrite.groovy | 213 +++++++++++++++ .../partition_curd_union_rewrite.groovy | 33 +-- .../partition_curd_union_rewrite_hive.groovy | 256 ++++++++++++++++++ 7 files changed, 1237 insertions(+), 27 deletions(-) create mode 100644 regression-test/suites/nereids_rules_p0/mv/create_part_and_up/create_commit_mtmv_many_task.groovy create mode 100644 regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_datetrunc_part_up.groovy create mode 100644 regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_part_up.groovy create mode 100644 regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_part_up_rewrite.groovy create mode 100644 regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_datetime_part_up_rewrite.groovy create mode 100644 regression-test/suites/nereids_rules_p0/mv/union_rewrite/partition_curd_union_rewrite_hive.groovy diff --git a/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/create_commit_mtmv_many_task.groovy b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/create_commit_mtmv_many_task.groovy new file mode 100644 index 00000000000000..b61918fe713ef3 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/create_commit_mtmv_many_task.groovy @@ -0,0 +1,200 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.time.LocalDate + +suite("create_commit_mtmv_many_tasks", "p2") { + + def src_database_name = context.config.getDbNameByFile(context.file) + sql """drop database if exists ${src_database_name};""" + sql """create database ${src_database_name};""" + sql """use ${src_database_name};""" + + def table_name1 = "lineitem" + def table_name2 = "orders" + sql """drop table if exists ${table_name1}""" + sql """drop table if exists ${table_name2}""" + sql """CREATE TABLE lineitem ( + l_orderkey bigint NOT NULL, + l_linenumber int not null, + l_partkey int NOT NULL, + l_suppkey int not null, + l_quantity decimal(15, 2) NOT NULL, + l_extendedprice decimal(15, 2) NOT NULL, + l_discount decimal(15, 2) NOT NULL, + l_tax decimal(15, 2) NOT NULL, + l_returnflag VARCHAR(1) NOT NULL, + l_linestatus VARCHAR(1) NOT NULL, + l_shipdate DATE NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct VARCHAR(25) NOT NULL, + l_shipmode VARCHAR(10) NOT NULL, + l_comment VARCHAR(44) NOT NULL, + l_null VARCHAR(1) NULL + )ENGINE=OLAP + UNIQUE KEY(`l_orderkey`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "lineitem_orders", + "enable_unique_key_merge_on_write" = "true" + );""" + sql """CREATE TABLE orders ( + o_orderkey bigint NOT NULL, + o_custkey int NOT NULL, + o_orderstatus VARCHAR(1) NOT NULL, + o_totalprice decimal(15, 2) NOT NULL, + o_orderdate DATE NOT NULL, + o_orderpriority VARCHAR(15) NOT NULL, + o_clerk VARCHAR(15) NOT NULL, + o_shippriority int NOT NULL, + o_comment VARCHAR(79) NOT NULL, + o_null VARCHAR(1) NULL + )ENGINE=OLAP + UNIQUE KEY(`o_orderkey`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "lineitem_orders", + "enable_unique_key_merge_on_write" = "false" + );""" + + def stream_load_job = { table_name, src_file_name -> + streamLoad { + table table_name + set 'column_separator', '|' + file """${getS3Url() + '/regression/tpch/sf1/'}${src_file_name}""" + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + + } + sql "select count(*) from ${table_name}" + } + + for (int i = 1; i <= 10; i++) { + stream_load_job(table_name1, "lineitem.tbl.${i}") + stream_load_job(table_name2, "orders.tbl.${i}") + } + + def dst_database_name = "wz_tpch_mtmv_hit_property" + sql """drop database if exists ${dst_database_name};""" + sql """create database ${dst_database_name};""" + sql """use ${dst_database_name};""" + + sql """drop table if exists ${table_name1}""" + sql """drop table if exists ${table_name2}""" + sql """CREATE TABLE `lineitem` ( + `l_orderkey` BIGINT NOT NULL, + `l_linenumber` INT NOT NULL, + `l_partkey` INT NOT NULL, + `l_suppkey` INT NOT NULL, + `l_quantity` DECIMAL(15, 2) NOT NULL, + `l_extendedprice` DECIMAL(15, 2) NOT NULL, + `l_discount` DECIMAL(15, 2) NOT NULL, + `l_tax` DECIMAL(15, 2) NOT NULL, + `l_returnflag` VARCHAR(1) NOT NULL, + `l_linestatus` VARCHAR(1) NOT NULL, + `l_commitdate` DATE NOT NULL, + `l_receiptdate` DATE NOT NULL, + `l_shipinstruct` VARCHAR(25) NOT NULL, + `l_shipmode` VARCHAR(10) NOT NULL, + `l_comment` VARCHAR(44) NOT NULL, + `l_shipdate` DATE NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(l_orderkey, l_linenumber, l_partkey, l_suppkey ) + COMMENT 'OLAP' + AUTO PARTITION BY range(date_trunc(`l_shipdate`, 'day')) () + DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + sql """CREATE TABLE `orders` ( + `o_orderkey` BIGINT NOT NULL, + `o_custkey` INT NOT NULL, + `o_orderstatus` VARCHAR(1) NOT NULL, + `o_totalprice` DECIMAL(15, 2) NOT NULL, + `o_orderpriority` VARCHAR(15) NOT NULL, + `o_clerk` VARCHAR(15) NOT NULL, + `o_shippriority` INT NOT NULL, + `o_comment` VARCHAR(79) NOT NULL, + `o_orderdate` DATE NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`o_orderkey`, `o_custkey`) + COMMENT 'OLAP' + AUTO PARTITION BY range(date_trunc(`o_orderdate`, 'day')) () + DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + sql """drop MATERIALIZED VIEW if exists mv1;""" + sql """ + CREATE MATERIALIZED VIEW mv1 + BUILD IMMEDIATE REFRESH ON COMMIT + partition by(l_shipdate) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') AS + select l_shipdate, l_orderkey from lineitem as t1 left join orders as t2 on t1.l_orderkey = t2.o_orderkey group by l_shipdate, l_orderkey; + """ + + def insert_into_select = { date_it -> + sql """INSERT INTO ${dst_database_name}.${table_name1} + SELECT l_orderkey, l_linenumber, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment, '${date_it}' AS new_date_column + FROM ${src_database_name}.${table_name1};""" + + sql """INSERT INTO ${dst_database_name}.${table_name2} + SELECT o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderpriority, o_clerk, o_shippriority, o_comment, '${date_it}' AS new_date_column + FROM ${src_database_name}.${table_name2}""" + } + + def get_next_day = { def date_it -> + def date = LocalDate.parse(date_it) + def next_day = date.plusDays(1) + return next_day + } + + def start_date = "2023-12-01" + while (true) { + if (start_date.toString() == "2024-03-11") { + break + } + logger.info("task load start") + insert_into_select(start_date) + start_date = get_next_day(start_date.toString()) + } + + def job_name = getJobName(dst_database_name, "mv1") + waitingMTMVTaskFinished(job_name) + def task_num = sql """select count(*) from tasks("type"="mv") where JobName="${job_name}";""" + assertTrue(task_num[0][0] < 100) + + def mv_row_count = sql """select count(1) from mv1;""" + def real_row_count = sql """select count(1) from (select l_shipdate, l_orderkey from lineitem as t1 left join orders as t2 on t1.l_orderkey = t2.o_orderkey group by l_shipdate, l_orderkey) t;""" + assertTrue(mv_row_count[0][0] == real_row_count[0][0]) + +} diff --git a/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_datetrunc_part_up.groovy b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_datetrunc_part_up.groovy new file mode 100644 index 00000000000000..c86f5d6a141f7f --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_datetrunc_part_up.groovy @@ -0,0 +1,142 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("mtmv_range_date_datetrunc_date_part_up") { + + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "SET enable_materialized_view_rewrite=true" + sql "SET enable_nereids_timeout = false" + String mv_prefix = "range_datetrunc_date_up" + String tb_name = mv_prefix + "_tb" + String mv_name = mv_prefix + "_mv" + + sql """ + drop table if exists ${tb_name} + """ + + sql """CREATE TABLE `${tb_name}` ( + `l_orderkey` BIGINT NULL, + `l_linenumber` INT NULL, + `l_partkey` INT NULL, + `l_suppkey` INT NULL, + `l_quantity` DECIMAL(15, 2) NULL, + `l_extendedprice` DECIMAL(15, 2) NULL, + `l_discount` DECIMAL(15, 2) NULL, + `l_tax` DECIMAL(15, 2) NULL, + `l_returnflag` VARCHAR(1) NULL, + `l_linestatus` VARCHAR(1) NULL, + `l_commitdate` DATE NULL, + `l_receiptdate` DATE NULL, + `l_shipinstruct` VARCHAR(25) NULL, + `l_shipmode` VARCHAR(10) NULL, + `l_comment` VARCHAR(44) NULL, + `l_shipdate` DATEtime not NULL + ) ENGINE=OLAP + DUPLICATE KEY(l_orderkey, l_linenumber, l_partkey, l_suppkey ) + COMMENT 'OLAP' + partition by range (`l_shipdate`) ( + partition p1 values [("2023-10-29 00:00:00"), ("2023-10-29 01:00:00")), + partition p2 values [("2023-10-29 01:00:00"), ("2023-10-29 02:00:00")), + partition p3 values [("2023-10-29 02:00:00"), ("2023-10-29 03:00:00")), + partition p4 values [("2023-10-29 03:00:00"), ("2023-10-29 04:00:00")), + partition p5 values [("2023-10-29 04:00:00"), ("2023-10-29 05:00:00")) + ) + DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """ + insert into ${tb_name} values + (null, 1, 2, 3, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-29 00:00:00'), + (1, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-29 01:00:00'), + (3, 3, null, 2, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19', 'c', 'd', 'xxxxxxxxx', '2023-10-29 02:00:00'), + (1, 2, 3, null, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-29 03:00:00'), + (2, 3, 2, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', null, '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-29 04:00:00'); + """ + + def compare_res = { def stmt -> + sql "SET enable_materialized_view_rewrite=false" + def origin_res = sql stmt + logger.info("origin_res: " + origin_res) + sql "SET enable_materialized_view_rewrite=true" + def mv_origin_res = sql stmt + logger.info("mv_origin_res: " + mv_origin_res) + assertTrue((mv_origin_res == [] && origin_res == []) || (mv_origin_res.size() == origin_res.size())) + for (int row = 0; row < mv_origin_res.size(); row++) { + assertTrue(mv_origin_res[row].size() == origin_res[row].size()) + for (int col = 0; col < mv_origin_res[row].size(); col++) { + assertTrue(mv_origin_res[row][col] == origin_res[row][col]) + } + } + } + + def create_mv = { cur_mv_name, mv_sql, col_name, date_trunc_range -> + sql """DROP MATERIALIZED VIEW IF EXISTS ${cur_mv_name};""" + sql """DROP TABLE IF EXISTS ${cur_mv_name}""" + sql""" + CREATE MATERIALIZED VIEW ${cur_mv_name} + BUILD IMMEDIATE REFRESH AUTO ON MANUAL + partition by(date_trunc(`${col_name}`, '${date_trunc_range}')) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + ${mv_sql} + """ + } + + def select_list1_1 = "l_shipdate" + def select_list1_2 = "date_trunc(`l_shipdate`, 'day') as col1" + def select_list1_3 = "DATE_FORMAT(`l_shipdate`, '%Y-%m-%d')" + def select_list2_1 = "date_trunc(`l_shipdate`, 'day') as col1, l_shipdate" + def select_list2_2 = "date_trunc(`l_shipdate`, 'day') as col1, DATE_FORMAT(`l_shipdate`, '%Y-%m-%d')" + def select_list2_3 = "l_shipdate, DATE_FORMAT(`l_shipdate`, '%Y-%m-%d')" + def select_list3_1 = "date_trunc(`l_shipdate`, 'day') as col1, l_shipdate, DATE_FORMAT(`l_shipdate`, '%Y-%m-%d')" + def select_list3_2 = "date_trunc(`l_shipdate`, 'day') as col1, DATE_FORMAT(`l_shipdate`, '%Y-%m-%d'), l_shipdate" + def select_list3_3 = "l_shipdate, DATE_FORMAT(`l_shipdate`, '%Y-%m-%d'), date_trunc(`l_shipdate`, 'day') as col1" + + def select_lists = [select_list1_1, select_list1_2, select_list1_3, select_list2_1, select_list2_2, + select_list2_3, select_list3_1, select_list3_2, select_list3_3] + for (int i = 0; i < select_lists.size(); i++) { + for (int j = 0; j < select_lists.size(); j++) { + if (i == j || j > 5) { + def group_by_str = select_lists[j].replaceAll("as col1", "") + def str = "select " + select_lists[i] + " from ${tb_name} group by " + group_by_str + sql str + + if (select_lists[i].replaceAll("`l_shipdate`", "").indexOf("l_shipdate") != -1) { + create_mv(mv_name, str, "l_shipdate", "day") + waitingMTMVTaskFinishedByMvName(mv_name) + mv_rewrite_success(str, mv_name) + compare_res(str + " order by 1,2,3") + } + + if (select_lists[i].indexOf("col1") != -1) { + create_mv(mv_name, str, "col1", "day") + waitingMTMVTaskFinishedByMvName(mv_name) + mv_rewrite_success(str, mv_name) + compare_res(str + " order by 1,2,3") + } + + } + } + } + +} diff --git a/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_part_up.groovy b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_part_up.groovy new file mode 100644 index 00000000000000..5be6b1afdcdc44 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_part_up.groovy @@ -0,0 +1,207 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("mtmv_range_date_part_up") { + + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "SET enable_materialized_view_rewrite=true" + sql "SET enable_nereids_timeout = false" + String mv_prefix = "range_date_up" + + sql """ + drop table if exists lineitem_range_date + """ + + sql """CREATE TABLE `lineitem_range_date` ( + `l_orderkey` BIGINT NULL, + `l_linenumber` INT NULL, + `l_partkey` INT NULL, + `l_suppkey` INT NULL, + `l_quantity` DECIMAL(15, 2) NULL, + `l_extendedprice` DECIMAL(15, 2) NULL, + `l_discount` DECIMAL(15, 2) NULL, + `l_tax` DECIMAL(15, 2) NULL, + `l_returnflag` VARCHAR(1) NULL, + `l_linestatus` VARCHAR(1) NULL, + `l_commitdate` DATE NULL, + `l_receiptdate` DATE NULL, + `l_shipinstruct` VARCHAR(25) NULL, + `l_shipmode` VARCHAR(10) NULL, + `l_comment` VARCHAR(44) NULL, + `l_shipdate` DATE not NULL + ) ENGINE=OLAP + DUPLICATE KEY(l_orderkey, l_linenumber, l_partkey, l_suppkey ) + COMMENT 'OLAP' + partition by range (`l_shipdate`) ( + partition p1 values [("2023-10-29"), ("2023-10-30")), + partition p2 values [("2023-10-30"), ("2023-10-31")), + partition p3 values [("2023-10-31"), ("2023-11-01"))) + DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv1;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv1 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(col1) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select l_shipdate as col1 from lineitem_range_date;""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv2_1;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv2_1 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(col1) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'day') as col1 from lineitem_range_date;""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv2_2;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv2_2 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(l_shipdate) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'day') as col1, l_shipdate from lineitem_range_date;""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv3;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv3 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(date_trunc(`l_shipdate`, 'day')) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select l_shipdate from lineitem_range_date;""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv4_1;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv4_1 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(date_trunc(`l_shipdate`, 'day')) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'day') as col1, l_shipdate from lineitem_range_date;""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv4_2;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv4_2 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(date_trunc(`col1`, 'day')) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'day') as col1 from lineitem_range_date;""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv5;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv5 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(col1) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'month') as col1 from lineitem_range_date;""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv6;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv6 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(date_trunc(`l_shipdate`, 'month')) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select l_shipdate from lineitem_range_date;""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv7_1;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv7_1 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(date_trunc(`col1`, 'year')) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'month') as col1 from lineitem_range_date;""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv7_2;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv7_2 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(date_trunc(`l_shipdate`, 'year')) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'month') as col1, l_shipdate from lineitem_range_date;""" + + // don't create + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv8;""" + try { + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv8 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(date_trunc(`col1`, 'month')) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'year') as col1, l_shipdate from lineitem_range_date;""" + } catch (Exception e) { + log.info(e.getMessage()) + assertTrue(e.getMessage().contains("Unable to find a suitable base table for partitioning")) + } + + sql """ + insert into lineitem_range_date values + (null, 1, 2, 3, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-29'), + (1, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-29'), + (3, 3, null, 2, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19', 'c', 'd', 'xxxxxxxxx', '2023-10-31'), + (1, 2, 3, null, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-29'), + (2, 3, 2, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', null, '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-30'), + (3, 1, 1, 2, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', null, 'c', 'd', 'xxxxxxxxx', '2023-10-31'), + (1, 3, 2, 2, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-29'); + """ + + def get_part = { def mv_name -> + def part_res = sql """show partitions from ${mv_name}""" + return part_res.size() + } + + def localWaitingMTMVTaskFinished = { def jobName -> + Thread.sleep(2000); + String showTasks = "select TaskId,JobId,JobName,MvId,Status,MvName,MvDatabaseName,ErrorMsg from tasks('type'='mv') where JobName = '${jobName}' order by CreateTime ASC" + String status = "NULL" + List> result + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + result = sql(showTasks) + logger.info("result: " + result.toString()) + if (!result.isEmpty()) { + status = result.last().get(4) + } + logger.info("The state of ${showTasks} is ${status}") + Thread.sleep(1000); + } while (timeoutTimestamp > System.currentTimeMillis() && (status == 'PENDING' || status == 'RUNNING' || status == 'NULL')) + if (status != "SUCCESS") { + logger.info("status is not success") + } + } + + def mv_name_list = ["${mv_prefix}_mv1", "${mv_prefix}_mv2_1", "${mv_prefix}_mv2_2", "${mv_prefix}_mv3", "${mv_prefix}_mv4_1", "${mv_prefix}_mv4_2", "${mv_prefix}_mv5", "${mv_prefix}_mv6", "${mv_prefix}_mv7_1", "${mv_prefix}_mv7_2"] + def mv_part = [3, 3, 3, 3, 3, 3, 1, 1, 1, 1] + for (int i = 0; i < mv_name_list.size(); i++) { + sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" + def job_name = getJobName(db, mv_name_list[i]) + waitingMTMVTaskFinished(job_name) + assertEquals(get_part(mv_name_list[i]), mv_part[i]) + } + + sql """alter table lineitem_range_date add partition p4 values [("2023-11-01"), ("2023-11-02"));""" + sql """insert into lineitem_range_date values + (1, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-11-01')""" + + mv_part = [4, 4, 4, 4, 4, 4, 2, 2, 1, 1] + for (int i = 0; i < mv_name_list.size(); i++) { + sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" + def job_name = getJobName(db, mv_name_list[i]) + waitingMTMVTaskFinished(job_name) + assertEquals(get_part(mv_name_list[i]), mv_part[i]) + } + + sql """alter table lineitem_range_date add partition p5 values [("2023-11-02"), ("2023-12-02"));""" + sql """insert into lineitem_range_date values + (1, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-11-02')""" + + mv_part = [5, -1, 5, -1, -1, -1, -1, -1, 1, 1] + for (int i = 0; i < mv_name_list.size(); i++) { + sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" + def job_name = getJobName(db, mv_name_list[i]) + if (i in [1, 3, 4, 5, 6, 7]) { + localWaitingMTMVTaskFinished(job_name) + def mv_task = sql "select TaskId,JobId,JobName,MvId,Status,MvName,MvDatabaseName,ErrorMsg from tasks('type'='mv') where JobName = '${job_name}' order by CreateTime DESC" + logger.info("mv_task: " + mv_task) + assertEquals("FAILED", mv_task[0][4]) + } else { + waitingMTMVTaskFinished(job_name) + assertEquals(get_part(mv_name_list[i]), mv_part[i]) + } + } + + sql """alter table lineitem_range_date add partition p6 values [("2023-12-02"), ("2024-12-02"));""" + sql """insert into lineitem_range_date values + (1, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2024-12-01')""" + + mv_part = [6, -1, 6, -1, -1, -1, -1, -1, -1, -1] + for (int i = 0; i < mv_name_list.size(); i++) { + sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" + if (i in [1, 3, 4, 5, 6, 7, 8, 9]) { + def job_name = getJobName(db, mv_name_list[i]) + localWaitingMTMVTaskFinished(job_name) + def mv_task = sql "select TaskId,JobId,JobName,MvId,Status,MvName,MvDatabaseName,ErrorMsg from tasks('type'='mv') where JobName = '${job_name}' order by CreateTime DESC" + assertEquals("FAILED", mv_task[0][4]) + } else { + def job_name = getJobName(db, mv_name_list[i]) + waitingMTMVTaskFinished(job_name) + assertEquals(get_part(mv_name_list[i]), mv_part[i]) + } + } + +} diff --git a/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_part_up_rewrite.groovy b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_part_up_rewrite.groovy new file mode 100644 index 00000000000000..8bf629d93841fb --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_date_part_up_rewrite.groovy @@ -0,0 +1,213 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("mtmv_range_date_part_up_rewrite") { + + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "SET enable_materialized_view_rewrite=true" + sql "SET enable_materialized_view_nest_rewrite=true" + sql "SET enable_materialized_view_union_rewrite=true" + sql "SET enable_nereids_timeout = false" + String mv_prefix = "range_date_up_union" + + sql """ + drop table if exists lineitem_range_date_union + """ + + sql """CREATE TABLE `lineitem_range_date_union` ( + `l_orderkey` BIGINT NULL, + `l_linenumber` INT NULL, + `l_partkey` INT NULL, + `l_suppkey` INT NULL, + `l_quantity` DECIMAL(15, 2) NULL, + `l_extendedprice` DECIMAL(15, 2) NULL, + `l_discount` DECIMAL(15, 2) NULL, + `l_tax` DECIMAL(15, 2) NULL, + `l_returnflag` VARCHAR(1) NULL, + `l_linestatus` VARCHAR(1) NULL, + `l_commitdate` DATE NULL, + `l_receiptdate` DATE NULL, + `l_shipinstruct` VARCHAR(25) NULL, + `l_shipmode` VARCHAR(10) NULL, + `l_comment` VARCHAR(44) NULL, + `l_shipdate` DATE not NULL + ) ENGINE=OLAP + DUPLICATE KEY(l_orderkey, l_linenumber, l_partkey, l_suppkey ) + COMMENT 'OLAP' + partition by range (`l_shipdate`) ( + partition p1 values [("2023-10-29"), ("2023-10-30")), + partition p2 values [("2023-10-30"), ("2023-10-31")), + partition p3 values [("2023-10-31"), ("2023-11-01"))) + DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """ + drop table if exists orders_range_date_union + """ + + sql """CREATE TABLE `orders_range_date_union` ( + `o_orderkey` BIGINT NULL, + `o_custkey` INT NULL, + `o_orderstatus` VARCHAR(1) NULL, + `o_totalprice` DECIMAL(15, 2) NULL, + `o_orderpriority` VARCHAR(15) NULL, + `o_clerk` VARCHAR(15) NULL, + `o_shippriority` INT NULL, + `o_comment` VARCHAR(79) NULL, + `o_orderdate` DATE not NULL + ) ENGINE=OLAP + DUPLICATE KEY(`o_orderkey`, `o_custkey`) + COMMENT 'OLAP' + partition by range (`o_orderdate`) ( + partition p1 values [("2023-10-29"), ("2023-10-30")), + partition p2 values [("2023-10-30"), ("2023-10-31")), + partition p3 values [("2023-10-31"), ("2023-11-01")), + partition p4 values [("2023-11-01"), ("2023-11-02")), + partition p5 values [("2023-11-02"), ("2023-11-03"))) + DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """ + insert into lineitem_range_date_union values + (null, 1, 2, 3, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-29'), + (1, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-29'), + (3, 3, null, 2, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19', 'c', 'd', 'xxxxxxxxx', '2023-10-31'), + (1, 2, 3, null, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-29'), + (2, 3, 2, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', null, '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-30'), + (3, 1, 1, 2, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', null, 'c', 'd', 'xxxxxxxxx', '2023-10-31'), + (1, 3, 2, 2, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-29'); + """ + + sql """ + insert into orders_range_date_union values + (null, 1, 'k', 99.5, 'a', 'b', 1, 'yy', '2023-10-29'), + (1, null, 'o', 109.2, 'c','d',2, 'mm', '2023-10-29'), + (3, 3, null, 99.5, 'a', 'b', 1, 'yy', '2023-10-30'), + (1, 2, 'o', null, 'a', 'b', 1, 'yy', '2023-11-01'), + (2, 3, 'k', 109.2, null,'d',2, 'mm', '2023-11-02'), + (3, 1, 'k', 99.5, 'a', null, 1, 'yy', '2023-11-02'), + (1, 3, 'o', 99.5, 'a', 'b', null, 'yy', '2023-10-31'), + (2, 1, 'o', 109.2, 'c','d',2, null, '2023-10-30'), + (3, 2, 'k', 99.5, 'a', 'b', 1, 'yy', '2023-10-29'), + (4, 5, 'k', 99.5, 'a', 'b', 1, 'yy', '2023-10-31'); + """ + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv1;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv1 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(date_trunc(`col1`, 'month')) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'day') as col1, l_shipdate, l_orderkey from lineitem_range_date_union as t1 left join orders_range_date_union as t2 on t1.l_orderkey = t2.o_orderkey group by col1, l_shipdate, l_orderkey;""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv2;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv2 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(date_trunc(`col1`, 'month')) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'hour') as col1, l_shipdate, l_orderkey from lineitem_range_date_union as t1 left join orders_range_date_union as t2 on t1.l_orderkey = t2.o_orderkey group by col1, l_shipdate, l_orderkey;""" + + def sql1 = """select date_trunc(`l_shipdate`, 'day') as col1, l_shipdate, l_orderkey from lineitem_range_date_union as t1 left join orders_range_date_union as t2 on t1.l_orderkey = t2.o_orderkey group by col1, l_shipdate, l_orderkey""" + def sql2 = """select date_trunc(`l_shipdate`, 'hour') as col1, l_shipdate, l_orderkey from lineitem_range_date_union as t1 left join orders_range_date_union as t2 on t1.l_orderkey = t2.o_orderkey group by col1, l_shipdate, l_orderkey""" + + def localWaitingMTMVTaskFinished = { def jobName -> + Thread.sleep(2000); + String showTasks = "select TaskId,JobId,JobName,MvId,Status,MvName,MvDatabaseName,ErrorMsg from tasks('type'='mv') where JobName = '${jobName}' order by CreateTime ASC" + String status = "NULL" + List> result + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + result = sql(showTasks) + logger.info("result: " + result.toString()) + if (!result.isEmpty()) { + status = result.last().get(4) + } + logger.info("The state of ${showTasks} is ${status}") + Thread.sleep(1000); + } while (timeoutTimestamp > System.currentTimeMillis() && (status == 'PENDING' || status == 'RUNNING' || status == 'NULL')) + if (status != "SUCCESS") { + logger.info("status is not success") + } + } + + def compare_res = { def stmt -> + sql "SET enable_materialized_view_rewrite=false" + def origin_res = sql stmt + logger.info("origin_res: " + origin_res) + sql "SET enable_materialized_view_rewrite=true" + def mv_origin_res = sql stmt + logger.info("mv_origin_res: " + mv_origin_res) + assertTrue((mv_origin_res == [] && origin_res == []) || (mv_origin_res.size() == origin_res.size())) + for (int row = 0; row < mv_origin_res.size(); row++) { + assertTrue(mv_origin_res[row].size() == origin_res[row].size()) + for (int col = 0; col < mv_origin_res[row].size(); col++) { + assertTrue(mv_origin_res[row][col] == origin_res[row][col]) + } + } + } + + def query_stmt_list = [sql1, sql2] + def mv_name_list = ["${mv_prefix}_mv1", "${mv_prefix}_mv2"] + for (int i = 0; i < mv_name_list.size(); i++) { + def job_name = getJobName(db, mv_name_list[i]) + waitingMTMVTaskFinished(job_name) + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + + sql """alter table lineitem_range_date_union add partition p4 values [("2023-11-01"), ("2023-11-02"));""" + sql """insert into lineitem_range_date_union values + (1, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-11-01')""" + for (int i = 0; i < mv_name_list.size(); i++) { + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + for (int i = 0; i < mv_name_list.size(); i++) { + sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + sql """insert into lineitem_range_date_union values + (2, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-11-01');""" + for (int i = 0; i < mv_name_list.size(); i++) { + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + for (int i = 0; i < mv_name_list.size(); i++) { + sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + sql """ALTER TABLE lineitem_range_date_union DROP PARTITION IF EXISTS p4 FORCE""" + for (int i = 0; i < mv_name_list.size(); i++) { + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + for (int i = 0; i < mv_name_list.size(); i++) { + sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + +} diff --git a/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_datetime_part_up_rewrite.groovy b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_datetime_part_up_rewrite.groovy new file mode 100644 index 00000000000000..b7e9562249d789 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_datetime_part_up_rewrite.groovy @@ -0,0 +1,213 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("mtmv_range_datetime_part_up_rewrite") { + + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "SET enable_materialized_view_rewrite=true" + sql "SET enable_materialized_view_nest_rewrite=true" + sql "SET enable_materialized_view_union_rewrite=true" + sql "SET enable_nereids_timeout = false" + String mv_prefix = "range_datetime_up_union" + + sql """ + drop table if exists lineitem_range_datetime_union + """ + + sql """CREATE TABLE `lineitem_range_datetime_union` ( + `l_orderkey` BIGINT NULL, + `l_linenumber` INT NULL, + `l_partkey` INT NULL, + `l_suppkey` INT NULL, + `l_quantity` DECIMAL(15, 2) NULL, + `l_extendedprice` DECIMAL(15, 2) NULL, + `l_discount` DECIMAL(15, 2) NULL, + `l_tax` DECIMAL(15, 2) NULL, + `l_returnflag` VARCHAR(1) NULL, + `l_linestatus` VARCHAR(1) NULL, + `l_commitdate` DATE NULL, + `l_receiptdate` DATE NULL, + `l_shipinstruct` VARCHAR(25) NULL, + `l_shipmode` VARCHAR(10) NULL, + `l_comment` VARCHAR(44) NULL, + `l_shipdate` DATEtime not NULL + ) ENGINE=OLAP + DUPLICATE KEY(l_orderkey, l_linenumber, l_partkey, l_suppkey ) + COMMENT 'OLAP' + partition by range (`l_shipdate`) ( + partition p1 values [("2023-10-29 00:00:00"), ("2023-10-29 01:00:00")), + partition p2 values [("2023-10-29 01:00:00"), ("2023-10-29 02:00:00")), + partition p3 values [("2023-10-29 02:00:00"), ("2023-10-29 03:00:00"))) + DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """ + drop table if exists orders_range_datetime_union + """ + + sql """CREATE TABLE `orders_range_datetime_union` ( + `o_orderkey` BIGINT NULL, + `o_custkey` INT NULL, + `o_orderstatus` VARCHAR(1) NULL, + `o_totalprice` DECIMAL(15, 2) NULL, + `o_orderpriority` VARCHAR(15) NULL, + `o_clerk` VARCHAR(15) NULL, + `o_shippriority` INT NULL, + `o_comment` VARCHAR(79) NULL, + `o_orderdate` DATEtime not NULL + ) ENGINE=OLAP + DUPLICATE KEY(`o_orderkey`, `o_custkey`) + COMMENT 'OLAP' + partition by range (`o_orderdate`) ( + partition p1 values [("2023-10-29 00:00:00"), ("2023-10-29 01:00:00")), + partition p2 values [("2023-10-29 01:00:00"), ("2023-10-29 02:00:00")), + partition p3 values [("2023-10-29 02:00:00"), ("2023-10-29 03:00:00")), + partition p4 values [("2023-10-29 03:00:00"), ("2023-10-29 04:00:00")), + partition p5 values [("2023-10-29 04:00:00"), ("2023-10-29 05:00:00"))) + DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """ + insert into lineitem_range_datetime_union values + (null, 1, 2, 3, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-29 00:00:00'), + (1, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-29 00:00:00'), + (3, 3, null, 2, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19', 'c', 'd', 'xxxxxxxxx', '2023-10-29 02:00:00'), + (1, 2, 3, null, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-29 00:00:00'), + (2, 3, 2, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', null, '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-29 01:00:00'), + (3, 1, 1, 2, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', null, 'c', 'd', 'xxxxxxxxx', '2023-10-29 02:00:00'), + (1, 3, 2, 2, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-29 00:00:00'); + """ + + sql """ + insert into orders_range_datetime_union values + (null, 1, 'k', 99.5, 'a', 'b', 1, 'yy', '2023-10-29 00:00:00'), + (1, null, 'o', 109.2, 'c','d',2, 'mm', '2023-10-29 00:00:00'), + (3, 3, null, 99.5, 'a', 'b', 1, 'yy', '2023-10-29 01:00:00'), + (1, 2, 'o', null, 'a', 'b', 1, 'yy', '2023-10-29 03:00:00'), + (2, 3, 'k', 109.2, null,'d',2, 'mm', '2023-10-29 04:00:00'), + (3, 1, 'k', 99.5, 'a', null, 1, 'yy', '2023-10-29 04:00:00'), + (1, 3, 'o', 99.5, 'a', 'b', null, 'yy', '2023-10-29 02:00:00'), + (2, 1, 'o', 109.2, 'c','d',2, null, '2023-10-29 01:00:00'), + (3, 2, 'k', 99.5, 'a', 'b', 1, 'yy', '2023-10-29 00:00:00'), + (4, 5, 'k', 99.5, 'a', 'b', 1, 'yy', '2023-10-29 02:00:00'); + """ + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv1;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv1 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(date_trunc(`col1`, 'month')) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'day') as col1, l_shipdate, l_orderkey from lineitem_range_datetime_union as t1 left join orders_range_datetime_union as t2 on t1.l_orderkey = t2.o_orderkey group by col1, l_shipdate, l_orderkey;""" + + sql """DROP MATERIALIZED VIEW if exists ${mv_prefix}_mv2;""" + sql """CREATE MATERIALIZED VIEW ${mv_prefix}_mv2 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(date_trunc(`col1`, 'month')) DISTRIBUTED BY RANDOM BUCKETS 2 PROPERTIES ('replication_num' = '1') AS + select date_trunc(`l_shipdate`, 'hour') as col1, l_shipdate, l_orderkey from lineitem_range_datetime_union as t1 left join orders_range_datetime_union as t2 on t1.l_orderkey = t2.o_orderkey group by col1, l_shipdate, l_orderkey;""" + + def sql1 = """select date_trunc(`l_shipdate`, 'day') as col1, l_shipdate, l_orderkey from lineitem_range_datetime_union as t1 left join orders_range_datetime_union as t2 on t1.l_orderkey = t2.o_orderkey group by col1, l_shipdate, l_orderkey""" + def sql2 = """select date_trunc(`l_shipdate`, 'hour') as col1, l_shipdate, l_orderkey from lineitem_range_datetime_union as t1 left join orders_range_datetime_union as t2 on t1.l_orderkey = t2.o_orderkey group by col1, l_shipdate, l_orderkey""" + + def localWaitingMTMVTaskFinished = { def jobName -> + Thread.sleep(2000); + String showTasks = "select TaskId,JobId,JobName,MvId,Status,MvName,MvDatabaseName,ErrorMsg from tasks('type'='mv') where JobName = '${jobName}' order by CreateTime ASC" + String status = "NULL" + List> result + long startTime = System.currentTimeMillis() + long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min + do { + result = sql(showTasks) + logger.info("result: " + result.toString()) + if (!result.isEmpty()) { + status = result.last().get(4) + } + logger.info("The state of ${showTasks} is ${status}") + Thread.sleep(1000); + } while (timeoutTimestamp > System.currentTimeMillis() && (status == 'PENDING' || status == 'RUNNING' || status == 'NULL')) + if (status != "SUCCESS") { + logger.info("status is not success") + } + } + + def compare_res = { def stmt -> + sql "SET enable_materialized_view_rewrite=false" + def origin_res = sql stmt + logger.info("origin_res: " + origin_res) + sql "SET enable_materialized_view_rewrite=true" + def mv_origin_res = sql stmt + logger.info("mv_origin_res: " + mv_origin_res) + assertTrue((mv_origin_res == [] && origin_res == []) || (mv_origin_res.size() == origin_res.size())) + for (int row = 0; row < mv_origin_res.size(); row++) { + assertTrue(mv_origin_res[row].size() == origin_res[row].size()) + for (int col = 0; col < mv_origin_res[row].size(); col++) { + assertTrue(mv_origin_res[row][col] == origin_res[row][col]) + } + } + } + + def query_stmt_list = [sql1, sql2] + def mv_name_list = ["${mv_prefix}_mv1", "${mv_prefix}_mv2"] + for (int i = 0; i < mv_name_list.size(); i++) { + def job_name = getJobName(db, mv_name_list[i]) + waitingMTMVTaskFinished(job_name) + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + + sql """alter table lineitem_range_datetime_union add partition p4 values [("2023-11-29 03:00:00"), ("2023-11-29 04:00:00"));""" + sql """insert into lineitem_range_datetime_union values + (1, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-11-29 03:00:00')""" + for (int i = 0; i < mv_name_list.size(); i++) { + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + for (int i = 0; i < mv_name_list.size(); i++) { + sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + sql """insert into lineitem_range_datetime_union values + (3, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-11-29 03:00:00');""" + for (int i = 0; i < mv_name_list.size(); i++) { + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + for (int i = 0; i < mv_name_list.size(); i++) { + sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + sql """ALTER TABLE lineitem_range_datetime_union DROP PARTITION IF EXISTS p4 FORCE""" + for (int i = 0; i < mv_name_list.size(); i++) { + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + + for (int i = 0; i < mv_name_list.size(); i++) { + sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" + mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + compare_res(query_stmt_list[i] + " order by 1,2,3") + } + +} diff --git a/regression-test/suites/nereids_rules_p0/mv/union_rewrite/partition_curd_union_rewrite.groovy b/regression-test/suites/nereids_rules_p0/mv/union_rewrite/partition_curd_union_rewrite.groovy index acefb10021833a..4f6717dfe59255 100644 --- a/regression-test/suites/nereids_rules_p0/mv/union_rewrite/partition_curd_union_rewrite.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/union_rewrite/partition_curd_union_rewrite.groovy @@ -156,7 +156,6 @@ suite ("partition_curd_union_rewrite") { ${mv_def_sql} """ - def compare_res = { def stmt -> sql "SET enable_materialized_view_rewrite=false" def origin_res = sql stmt @@ -186,7 +185,6 @@ suite ("partition_curd_union_rewrite") { is_partition_statistics_ready(db, ["lineitem", "orders", mv_name])) compare_res(partition_sql + order_by_stmt) - /* // Part partition is invalid, test can not use partition 2023-10-17 to rewrite sql """ insert into lineitem values @@ -194,15 +192,9 @@ suite ("partition_curd_union_rewrite") { """ // wait partition is invalid sleep(5000) - explain { - sql("${all_partition_sql}") - contains("${mv_name}(${mv_name})") - } + mv_rewrite_success(all_partition_sql, mv_name) compare_res(all_partition_sql + order_by_stmt) - explain { - sql("${partition_sql}") - contains("${mv_name}(${mv_name})") - } + mv_rewrite_success(partition_sql, mv_name) compare_res(partition_sql + order_by_stmt) sql "REFRESH MATERIALIZED VIEW ${mv_name} AUTO" @@ -214,15 +206,9 @@ suite ("partition_curd_union_rewrite") { """ // Wait partition is invalid sleep(5000) - explain { - sql("${all_partition_sql}") - contains("${mv_name}(${mv_name})") - } + mv_rewrite_success(all_partition_sql, mv_name) compare_res(all_partition_sql + order_by_stmt) - explain { - sql("${partition_sql}") - contains("${mv_name}(${mv_name})") - } + mv_rewrite_success(partition_sql, mv_name) compare_res(partition_sql + order_by_stmt) // Test when base table delete partition test @@ -232,15 +218,8 @@ suite ("partition_curd_union_rewrite") { """ // Wait partition is invalid sleep(3000) - explain { - sql("${all_partition_sql}") - contains("${mv_name}(${mv_name})") - } + mv_rewrite_success(all_partition_sql, mv_name) compare_res(all_partition_sql + order_by_stmt) - explain { - sql("${partition_sql}") - contains("${mv_name}(${mv_name})") - } + mv_rewrite_success(partition_sql, mv_name) compare_res(partition_sql + order_by_stmt) - */ } diff --git a/regression-test/suites/nereids_rules_p0/mv/union_rewrite/partition_curd_union_rewrite_hive.groovy b/regression-test/suites/nereids_rules_p0/mv/union_rewrite/partition_curd_union_rewrite_hive.groovy new file mode 100644 index 00000000000000..14170bf21f40fa --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/union_rewrite/partition_curd_union_rewrite_hive.groovy @@ -0,0 +1,256 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite ("partition_curd_union_rewrite_hive") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return + } + + sql """SET materialized_view_rewrite_enable_contain_external_table = true;""" + + def create_mv = { mv_name, mv_sql -> + sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name};""" + sql """DROP TABLE IF EXISTS ${mv_name}""" + sql""" + CREATE MATERIALIZED VIEW ${mv_name} + BUILD IMMEDIATE REFRESH AUTO ON MANUAL + partition by(l_shipdate) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + ${mv_sql} + """ + } + def compare_res = { def stmt -> + def mark = true + sql "SET materialized_view_rewrite_enable_contain_external_table=false" + def origin_res = sql stmt + logger.info("origin_res: " + origin_res) + sql "SET materialized_view_rewrite_enable_contain_external_table=true" + def mv_origin_res = sql stmt + logger.info("mv_origin_res: " + mv_origin_res) + if (!((mv_origin_res == [] && origin_res == []) || (mv_origin_res.size() == origin_res.size()))) { + mark = false + return mark + } + assertTrue((mv_origin_res == [] && origin_res == []) || (mv_origin_res.size() == origin_res.size())) + for (int row = 0; row < mv_origin_res.size(); row++) { + if (!(mv_origin_res[row].size() == origin_res[row].size())) { + mark = false + return mark + } + assertTrue(mv_origin_res[row].size() == origin_res[row].size()) + for (int col = 0; col < mv_origin_res[row].size(); col++) { + if (!(mv_origin_res[row][col] == origin_res[row][col])) { + mark = false + return mark + } + assertTrue(mv_origin_res[row][col] == origin_res[row][col]) + } + } + return mark + } + + + String db = context.config.getDbNameByFile(context.file) + String ctl = "partition_curd_union_hive" + for (String hivePrefix : ["hive2", "hive3"]) { + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") + String catalog_name = ctl + "_" + hivePrefix + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}', + 'use_meta_cache' = 'true' + );""" + sql """switch ${catalog_name}""" + sql """create database if not exists ${db}""" + sql """use `${db}`""" + String orders_tb_name = catalog_name + "_orders" + String lineitem_tb_name = catalog_name + "_lineitem" + def mv_name = catalog_name + "_test_mv" + + sql """drop table if exists ${orders_tb_name}""" + sql """CREATE TABLE IF NOT EXISTS ${orders_tb_name} ( + o_orderkey int, + o_custkey int, + o_orderstatus VARCHAR(1), + o_totalprice DECIMAL(15, 2), + o_orderpriority VARCHAR(15), + o_clerk VARCHAR(15), + o_shippriority int, + o_comment VARCHAR(15), + o_orderdate date + ) + ENGINE=hive + PARTITION BY LIST (`o_orderdate`) () + PROPERTIES ( + "replication_num" = "1", + 'file_format'='orc' + ); + """ + + sql """ + insert into ${orders_tb_name} values + (1, 1, 'o', 99.5, 'a', 'b', 1, 'yy', '2023-10-17'), + (2, 2, 'k', 109.2, 'c','d',2, 'mm', '2023-10-18'), + (3, 3, 'o', 99.5, 'a', 'b', 1, 'yy', '2023-10-19'); + """ + + sql """drop table if exists ${lineitem_tb_name}""" + sql""" + CREATE TABLE IF NOT EXISTS ${lineitem_tb_name} ( + l_orderkey INT, + l_partkey INT, + l_suppkey INT, + l_linenumber INT, + l_quantity DECIMAL(15, 2), + l_extendedprice DECIMAL(15, 2), + l_discount DECIMAL(15, 2), + l_tax DECIMAL(15, 2), + l_returnflag VARCHAR(1), + l_linestatus VARCHAR(1), + l_commitdate date, + l_receiptdate date, + l_shipinstruct VARCHAR(10), + l_shipmode VARCHAR(10), + l_comment VARCHAR(44), + l_shipdate date + ) ENGINE=hive + PARTITION BY LIST (`l_shipdate`) () + PROPERTIES ( + "replication_num" = "1", + 'file_format'='orc' + ); + """ + + sql """ + insert into ${lineitem_tb_name} values + (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-17'), + (2, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-18'), + (3, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19', 'c', 'd', 'xxxxxxxxx', '2023-10-19'); + """ + + sql """switch internal""" + sql """create database if not exists ${db}""" + sql """use ${db}""" + + def mv_def_sql = """ + select l_shipdate, o_orderdate, l_partkey, + l_suppkey, sum(o_totalprice) as sum_total + from ${catalog_name}.${db}.${lineitem_tb_name} + left join ${catalog_name}.${db}.${orders_tb_name} on l_orderkey = o_orderkey and l_shipdate = o_orderdate + group by + l_shipdate, + o_orderdate, + l_partkey, + l_suppkey + """ + + def all_partition_sql = """ + select l_shipdate, o_orderdate, l_partkey, l_suppkey, sum(o_totalprice) as sum_total + from ${catalog_name}.${db}.${lineitem_tb_name} as t1 + left join ${catalog_name}.${db}.${orders_tb_name} as t2 on l_orderkey = o_orderkey and l_shipdate = o_orderdate + group by + l_shipdate, + o_orderdate, + l_partkey, + l_suppkey + """ + + def partition_sql = """ + select l_shipdate, o_orderdate, l_partkey, l_suppkey, sum(o_totalprice) as sum_total + from ${catalog_name}.${db}.${lineitem_tb_name} as t1 + left join ${catalog_name}.${db}.${orders_tb_name} as t2 on l_orderkey = o_orderkey and l_shipdate = o_orderdate + where (l_shipdate>= '2023-10-18' and l_shipdate <= '2023-10-19') + group by + l_shipdate, + o_orderdate, + l_partkey, + l_suppkey + """ + + sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" + sql """DROP TABLE IF EXISTS ${mv_name}""" + sql """ + CREATE MATERIALIZED VIEW ${mv_name} + BUILD IMMEDIATE REFRESH AUTO ON MANUAL + partition by(l_shipdate) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + ${mv_def_sql} + """ + + def order_by_stmt = " order by 1,2,3,4,5" + waitingMTMVTaskFinished(getJobName(db, mv_name)) + + // All partition is valid, test query and rewrite by materialized view + mv_rewrite_success(all_partition_sql, mv_name) + compare_res(all_partition_sql + order_by_stmt) + mv_rewrite_success(partition_sql, mv_name) + compare_res(partition_sql + order_by_stmt) + + // Part partition is invalid, test can not use partition 2023-10-17 to rewrite + sql """ + insert into ${catalog_name}.${db}.${lineitem_tb_name} values + (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-17'); + """ + // wait partition is invalid + sleep(5000) + mv_rewrite_success(all_partition_sql, mv_name) + assertTrue(compare_res(all_partition_sql + order_by_stmt) == false) + mv_rewrite_success(partition_sql, mv_name) + assertTrue(compare_res(all_partition_sql + order_by_stmt) == false) + + + sql "REFRESH MATERIALIZED VIEW ${mv_name} AUTO" + waitingMTMVTaskFinished(getJobName(db, mv_name)) + mv_rewrite_success(all_partition_sql, mv_name) + compare_res(all_partition_sql + order_by_stmt) + mv_rewrite_success(partition_sql, mv_name) + compare_res(partition_sql + order_by_stmt) + + + // Test when base table create partition + sql """ + insert into ${catalog_name}.${db}.${lineitem_tb_name} values + (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-21', '2023-10-21', 'a', 'b', 'yyyyyyyyy', '2023-10-21'); + """ + // Wait partition is invalid + sleep(5000) + mv_rewrite_success(all_partition_sql, mv_name) + assertTrue(compare_res(all_partition_sql + order_by_stmt) == false) + mv_rewrite_success(partition_sql, mv_name) + compare_res(partition_sql + order_by_stmt) + + // Test when base table delete partition test + sql "REFRESH MATERIALIZED VIEW ${mv_name} AUTO" + waitingMTMVTaskFinished(getJobName(db, mv_name)) + mv_rewrite_success(all_partition_sql, mv_name) + compare_res(all_partition_sql + order_by_stmt) + mv_rewrite_success(partition_sql, mv_name) + compare_res(partition_sql + order_by_stmt) + + } +}