Skip to content

Commit

Permalink
[test](mtmv) Add inject statistic when mv rewrite regression test to …
Browse files Browse the repository at this point in the history
…make sure rewrite result stable (#43785)

### What problem does this PR solve?

The result of successful rewriting by the cbo optimizer depends on the
statistics.
The priority of the optimizer consumption statistics in descending order
is
1. the injected statistics
2. the statistics reported by be
3. and the statistics analyzed actively. 

When the pipeline runs the case, the statistics reported by be may not
be timely. Therefore, the outcome that leads to the cbo optimizer's
successful selection of overwrites is not very certain, so the
statistics are currently injected manually in the test cases
  • Loading branch information
seawinde authored Nov 29, 2024
1 parent c6cb3b6 commit 9daa3b7
Show file tree
Hide file tree
Showing 201 changed files with 532 additions and 170 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -487,27 +487,37 @@ private static boolean isGroupByEqualsByFunctionDependency(
if (!viewShuttledExprQueryBasedSet.containsAll(queryGroupShuttledExpression)) {
return false;
}
Set<Expression> viewShouldUniformExpressionSet = new HashSet<>();
Set<Expression> viewShouldRemovedExpressionSet = new HashSet<>();
Set<Expression> viewScanShouldReservedExpressionSet = new HashSet<>();
// calc the group by expr which is needed to roll up and should be uniform
for (Map.Entry<Expression, Expression> expressionEntry :
for (Map.Entry<Expression, Expression> expressionMappingEntry :
viewShuttledExprQueryBasedToViewGroupByExprMap.entrySet()) {
if (queryGroupShuttledExpression.contains(expressionEntry.getKey())) {
// the group expr which query has, do not require uniform
continue;
if (queryGroupShuttledExpression.contains(expressionMappingEntry.getKey())) {
// the group expr which query has, do not require eliminate
viewScanShouldReservedExpressionSet.add(
viewShuttledExprToScanExprMapping.get(expressionMappingEntry.getValue()));
} else {
// the view expression which is more than query's expression, should try to eliminate
viewShouldRemovedExpressionSet.add(expressionMappingEntry.getValue());
}
viewShouldUniformExpressionSet.add(expressionEntry.getValue());
}

DataTrait dataTrait = tempRewrittenPlan.computeDataTrait();
for (Expression shouldUniformExpr : viewShouldUniformExpressionSet) {
Expression viewScanExpression = viewShuttledExprToScanExprMapping.get(shouldUniformExpr);
for (Expression viewShouldRemovedExpr : viewShouldRemovedExpressionSet) {
Expression viewScanExpression = viewShuttledExprToScanExprMapping.get(viewShouldRemovedExpr);
if (viewScanExpression == null) {
return false;
}
if (!(viewScanExpression instanceof Slot)) {
return false;
}
if (!dataTrait.isUniform((Slot) viewScanExpression)) {
if (!dataTrait.isUniform((Slot) viewScanExpression)
&& Sets.intersection(dataTrait.calEqualSet((Slot) viewScanExpression),
viewScanShouldReservedExpressionSet).isEmpty()) {
// Such as query is l_orderkey#0, l_linenumber#1, o_custkey#17, l_partkey#2
// view is ps_partkey#25, o_orderkey#16, l_orderkey#0, l_linenumber#1, o_custkey#17, l_partkey#2
// If want to check the group by equality, ps_partkey#25, o_orderkey#16 should be uniform
// or should be equal any of [ l_orderkey#0, l_linenumber#1, o_custkey#17, l_partkey#2]
return false;
}
}
Expand Down
7 changes: 5 additions & 2 deletions regression-test/data/mv_p0/ssb/q_1_1/q_1_1.out
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
19930101 1 1 1 1 1 1 1 1 1 1 100 1 1 1 2023-06-09 shipmode name address city nation AMERICA phone mktsegment name address city nation AMERICA phone name MFGR#1 category brand color type 4 container
19930101 1 1 1 1 1 1 1 1 1 1 100 1 1 1 2023-06-09 shipmode name address city nation AMERICA phone mktsegment name address city nation AMERICA phone name MFGR#1 category brand color type 4 container
19930101 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2023-06-09 shipmode name address city nation region phone mktsegment name address city nation region phone name mfgr category brand color type 4 container
19930101 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2023-06-09 shipmode name address city nation region phone mktsegment name address city nation region phone name mfgr category brand color type 4 container
19930101 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2023-06-09 shipmode name address city nation region phone mktsegment name address city nation region phone name mfgr category brand color type 4 container
19930101 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2023-06-09 shipmode name address city nation region phone mktsegment name address city nation region phone name mfgr category brand color type 4 container

-- !select_mv --
4
16

-- !select --
4
16

Original file line number Diff line number Diff line change
Expand Up @@ -1983,9 +1983,8 @@ class Suite implements GroovyInterceptable {
check { result ->
boolean success = true;
for (String mv_name : mv_names) {
success = success && result.contains("${mv_name} chose")
Assert.assertEquals(true, result.contains("${mv_name} chose"))
}
Assert.assertEquals(true, success)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ suite ("agg_have_dup_base") {
qt_select_mv "select unix_timestamp(k1) tmp,sum(k2) from d_table group by tmp order by tmp;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k4 set stats ('row_count'='5');"""

mv_rewrite_success("select k1,sum(k2),max(k2) from d_table group by k1;", "k12s3m")

mv_rewrite_success("select k1,sum(k2) from d_table group by k1;", "k12s3m")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ suite ("diffrent_serialize") {
sql "insert into d_table select 2,2,2,'b';"
sql "insert into d_table select 3,3,null,'c';"

sql """alter table d_table modify column k4 set stats ('row_count'='7');"""

createMV("create materialized view mv1_1 as select k1,bitmap_intersect(to_bitmap(k2)) from d_table group by k1;")
createMV("create materialized view mv1 as select k1,bitmap_agg(k2) from d_table group by k1;")
createMV("create materialized view mv1_2 as select k1, multi_distinct_group_concat(k4) from d_table group by k1 order by k1;")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ suite ("test_agg_state_max_by") {
qt_select_star "select * from d_table order by 1,2;"
mv_rewrite_success("select k1,max_by(k2,k3) from d_table group by k1 order by 1,2;", "k1mb")
sql """set enable_stats=true;"""
sql """alter table d_table modify column k4 set stats ('row_count'='8');"""
mv_rewrite_success("select k1,max_by(k2,k3) from d_table group by k1 order by 1,2;", "k1mb")
qt_select_mv "select k1,max_by(k2,k3) from d_table group by k1 order by 1,2;"

Expand Down Expand Up @@ -101,6 +102,7 @@ suite ("test_agg_state_max_by") {
qt_select_star "select * from d_table order by 1,2;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k4 set stats ('row_count'='8');"""
sql "analyze table d_table with sync;"
sql """set enable_stats=false;"""

Expand All @@ -114,6 +116,7 @@ suite ("test_agg_state_max_by") {
qt_select_mv "select k1,max_by(k2,abs(k3)) from d_table group by k1 order by 1,2;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k4 set stats ('row_count'='8');"""
mv_rewrite_success("select k1,max_by(k2+k3,abs(k3)) from d_table group by k1 order by 1,2;", "k1mbcp1")
mv_rewrite_success("select k1,max_by(k2+k3,k3) from d_table group by k1 order by 1,2;", "k1mbcp2")
mv_rewrite_success("select k1,max_by(k2,abs(k3)) from d_table group by k1 order by 1,2;", "k1mbcp3")
Expand Down
3 changes: 3 additions & 0 deletions regression-test/suites/mv_p0/case_ignore/case_ignore.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ suite ("case_ignore") {
sql "insert into d_table select 2,2,2,'b';"
sql "insert into d_table select 3,-3,null,'c';"

sql """alter table d_table modify column k4 set stats ('row_count'='4');"""

createMV ("create materialized view k12a as select K1,abs(K2) from d_table;")

sql "insert into d_table select -4,-4,-4,'d';"
Expand All @@ -53,6 +55,7 @@ suite ("case_ignore") {
qt_select_mv "select K1,abs(K2) from d_table order by K1;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k4 set stats ('row_count'='8');"""
mv_rewrite_success("select k1,abs(k2) from d_table order by k1;", "k12a")
mv_rewrite_success("select K1,abs(K2) from d_table order by K1;", "k12a")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ suite("mv_contains_cast") {

order_qt_query_before "${query_sql}"

sql """alter table test modify column event_type set stats ('row_count'='10');"""

createMV("""
CREATE MATERIALIZED VIEW sync_mv
AS
Expand Down
2 changes: 2 additions & 0 deletions regression-test/suites/mv_p0/count_star/count_star.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ suite ("count_star") {
qt_select_mv "select count(*) from d_table where k3=1;"

sql """set enable_stats=true;"""

sql """alter table d_table modify column k4 set stats ('row_count'='8');"""
mv_rewrite_success("select k1,k4,count(*) from d_table group by k1,k4;", "kstar")
mv_rewrite_success("select k1,k4,count(*) from d_table where k1=1 group by k1,k4;", "kstar")
mv_rewrite_fail("select k1,k4,count(*) from d_table where k3=1 group by k1,k4;", "kstar")
Expand Down
2 changes: 2 additions & 0 deletions regression-test/suites/mv_p0/dis_26495/dis_26495.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ suite ("dis_26495") {

sql """insert into doris_test values (1,2,max_by_state(1,2));"""

sql """alter table doris_test modify column agg_st_1 set stats ('row_count'='1');"""

streamLoad {
table "doris_test"
set 'column_separator', ','
Expand Down
2 changes: 2 additions & 0 deletions regression-test/suites/mv_p0/k1ap2spa/k1ap2spa.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ suite ("k1ap2spa") {

sql """set enable_stats=true;"""

sql """alter table d_table modify column k1 set stats ('row_count'='5');"""

mv_rewrite_success("select abs(k1)+1 t,sum(abs(k2+1)) from d_table group by t order by t;", "k1ap2spa")

}
1 change: 1 addition & 0 deletions regression-test/suites/mv_p0/k1s2m3/k1s2m3.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ suite ("k1s2m3") {
sql "insert into d_table select 2,2,2,'b';"
sql "insert into d_table select 3,-3,null,'c';"

sql """alter table d_table modify column k1 set stats ('row_count'='6');"""
createMV("create materialized view k1s2m3 as select k1,sum(k2*k3) from d_table group by k1;")

sql "insert into d_table select -4,-4,-4,'d';"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,6 @@ suite ("k1s2m3_auto_inc") {
qt_select_mv "select k3,sum(abs(k2+1)) from d_table group by k3 order by 1;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='2');"""
mv_rewrite_success("select k3,sum(abs(k2+1)) from d_table group by k3 order by 1;", "k3ap2spa")
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ suite ("multi_agg_with_same_slot") {

sql """set enable_stats=true;"""

sql """alter table d_table modify column k1 set stats ('row_count'='5');"""
mv_rewrite_success("select k1,k2,avg(k3),max(k3) from d_table group by k1,k2 order by 1,2;", "kmv")
mv_rewrite_success("select k1,k2,avg(k3)+max(k3) from d_table group by k1,k2 order by 1,2;", "kmv")
mv_rewrite_success("select k1,k2,avg(k3)+max(k3) from d_table group by grouping sets((k1),(k1,k2),()) order by 1,2;", "kmv")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,6 @@ suite ("multi_slot_k123p") {
qt_select_mv "select k1,version() from d_table order by k1;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='5');"""
mv_rewrite_success("select k1,k2+k3 from d_table order by k1;", "k123p")
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ suite ("multi_slot_k1a2p2ap3p") {
qt_select_mv "select abs(k1)+k2+1,abs(k2+2)+k3+3 from d_table order by abs(k1)+k2+1,abs(k2+2)+k3+3;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='7');"""
mv_rewrite_success("select abs(k1)+k2+1,abs(k2+2)+k3+3 from d_table order by abs(k1)+k2+1,abs(k2+2)+k3+3", "k1a2p2ap3p")

}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ suite ("multi_slot_k1a2p2ap3ps") {
qt_select_base "select abs(k1)+k2+1,sum(abs(k2+2)+k3+3) from d_table group by abs(k1)+k2 order by abs(k1)+k2;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='4');"""
mv_rewrite_success("select abs(k1)+k2+1,sum(abs(k2+2)+k3+3) from d_table group by abs(k1)+k2+1 order by abs(k1)+k2+1", "k1a2p2ap3ps")

mv_rewrite_fail("select abs(k1)+k2+1,sum(abs(k2+2)+k3+3) from d_table group by abs(k1)+k2 order by abs(k1)+k2", "k1a2p2ap3ps")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,6 @@ suite ("multi_slot_k1p2ap3p") {
qt_select_mv "select k1+1,abs(k2+2)+k3+3 from d_table order by k1+1;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='4');"""
mv_rewrite_success("select k1+1,abs(k2+2)+k3+3 from d_table order by k1+1;", "k1p2ap3p")
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,6 @@ suite ("multi_slot_k1p2ap3ps") {
qt_select_mv "select k1+1,sum(abs(k2+2)+k3+3) from d_table group by k1+1 order by k1+1;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='5');"""
mv_rewrite_success("select k1+1,sum(abs(k2+2)+k3+3) from d_table group by k1+1 order by k1+1;", "k1p2ap3ps")
}
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ suite ("multi_slot_multi_mv") {
qt_select_mv "select abs(k1)+k2+1,abs(k2+2)+k3+3 from d_table order by abs(k1)+k2+1,abs(k2+2)+k3+3;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='5');"""
for (def i = 0; i < retry_times; ++i) {
boolean is_k1a2p2ap3p = false
boolean is_k1a2p2ap3ps = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ suite ("mv_with_view") {
qt_select_mv "select * from v_k124 order by k1;"

sql """set enable_stats=true;"""
sql """alter table d_table modify column k1 set stats ('row_count'='3');"""
mv_rewrite_fail("select * from d_table order by k1;", "k312")

sql """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ suite ("null_insert") {
GROUP BY date,vid,os,ver,ip_country;"""

sql """set enable_stats=true;"""
sql """alter table test modify column date set stats ('row_count'='3');"""
mv_rewrite_success("""SELECT date, vid, os, ver, ip_country, hll_union(hll_hash(uid))
FROM test
GROUP BY date,vid,os,ver,ip_country;""", "mv_test")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,6 @@ suite ("routine_load_hll") {
qt_select_mv "select time_stamp, hll_union_agg(device_id) from test group by time_stamp order by 1;"

sql """set enable_stats=true;"""
sql """alter table test modify column event_id set stats ('row_count'='2');"""
mv_rewrite_success("select time_stamp, hll_union_agg(device_id) from test group by time_stamp order by 1;", "m_view")
}
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ suite ("multiple_ssb") {
qt_select_count_3 "select LO_ORDERPRIORITY, count(1) from lineorder_flat where LO_ORDERPRIORITY in ('1','2','3') group by LO_ORDERPRIORITY order by 1,2;"

sql """set enable_stats=true;"""
sql """alter table lineorder_flat modify column LO_ORDERDATE set stats ('row_count'='8');"""
mv_rewrite_success("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
WHERE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ suite ("multiple_ssb_between") {

sql "analyze table lineorder_flat with sync;"
sql """set enable_stats=true;"""

sql """alter table lineorder_flat modify column LO_ORDERDATE set stats ('row_count'='8');"""

mv_rewrite_success("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
FROM lineorder_flat
Expand Down
Loading

0 comments on commit 9daa3b7

Please sign in to comment.