Skip to content

Commit

Permalink
[BugFix] Fix star count return null rather zero bug for mv rewrite (#…
Browse files Browse the repository at this point in the history
…49288)

Signed-off-by: shuming.li <[email protected]>
(cherry picked from commit b5a9d75)

# Conflicts:
#	fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/materialization/AggregatedMaterializedViewRewriter.java
#	fe/fe-core/src/test/java/com/starrocks/sql/plan/ReplayWithMVFromDumpTest.java
  • Loading branch information
LiShuMing authored and mergify[bot] committed Aug 2, 2024
1 parent 00fcaf4 commit 712806a
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,15 @@ private List<ScalarOperator> rewriteGroupKeys(List<ScalarOperator> groupKeys,

/**
* Rewrite aggregation by using MV.
* @param aggregates aggregation column ref -> scalar op to be rewritten
* @param equationRewriter equivalence class rewriter
* @param mapping output mapping for rewrite: column ref -> column ref
* @param queryColumnSet column set of query
* @param aggColRefToAggMap aggregate query column ref -> new scalar op which is used for rewrite mapping
* @param newProjection new projection mapping: col ref -> scalar op which is used for projection of new rewritten aggregate
* @param hasGroupByKeys whether query has group by keys or not
* @param context rewrite context
* @return
*/
private Map<ColumnRefOperator, CallOperator> rewriteAggregates(Map<ColumnRefOperator, ScalarOperator> aggregates,
EquationRewriter equationRewriter,
Expand Down Expand Up @@ -698,8 +707,24 @@ private Map<ColumnRefOperator, CallOperator> rewriteAggregates(Map<ColumnRefOper
// replace original projection
aggregateMapping.put(entry.getKey(), copyProject);
} else {
<<<<<<< HEAD
newAggregations.put(oldColRef, newAggregate);
newProjection.put(oldColRef, genRollupProject(aggCall, oldColRef, hasGroupByKeys));
=======
ColumnRefOperator newAggColRef = context.getQueryRefFactory().create(
origColRef, newAggregate.getType(), newAggregate.isNullable());
newAggregations.put(newAggColRef, newAggregate);
// No needs to set `newProjections` since it will use aggColRefToAggMap to construct new projections,
// otherwise it will cause duplicate projections(or wrong projections).
// eg:
// query: oldCol1 -> count()
// newAggregations: newCol1 -> sum(oldCol1)
// aggColRefToAggMap: oldCol1 -> coalesce(newCol1, 0)
// It will generate new projections as below:
// newProjections: oldCol1 -> coalesce(newCol1, 0)
ScalarOperator newProjectOp = genRollupProject(aggCall, newAggColRef, hasGroupByKeys);
aggColRefToAggMap.put(origColRef, newProjectOp);
>>>>>>> b5a9d75885 ([BugFix] Fix star count return null rather zero bug for mv rewrite (#49288))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5512,12 +5512,10 @@ public void testColumnPruningWithPredicates() {
" group by lo_orderdate" +
" having sum(lo_tax) > 100";
MVRewriteChecker checker = testRewriteOK(mv, query);
checker.contains("4:Project\n" +
checker.contains(" 4:Project\n" +
" | <slot 6> : 21: lo_orderdate\n" +
" | <slot 18> : 26: sum\n" +
" | <slot 19> : 27: sum\n" +
" | <slot 26> : clone(26: sum)\n" +
" | <slot 27> : clone(27: sum)\n" +
" | \n" +
" 3:AGGREGATE (merge finalize)");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,4 +242,38 @@ public void testSyncMVRewriteWithDict() throws Exception {
// "nmock_040, nmock_041 from tbl_mock_001 order by nmock_002;";
Assert.assertFalse(replayPair.second, replayPair.second.contains("mv_tbl_mock_001"));
}
<<<<<<< HEAD
}
=======

@Test
public void testViewDeltaRewriter() throws Exception {
QueryDebugOptions debugOptions = new QueryDebugOptions();
debugOptions.setEnableQueryTraceLog(true);
connectContext.getSessionVariable().setQueryDebugOptions(debugOptions.toString());
Pair<QueryDumpInfo, String> replayPair =
getPlanFragment(getDumpInfoFromFile("query_dump/view_delta"),
connectContext.getSessionVariable(), TExplainLevel.NORMAL);
Assert.assertTrue(replayPair.second, replayPair.second.contains("mv_yyf_trade_water3"));
}

@Test
public void testMV_CountStarRewrite() throws Exception {
QueryDebugOptions debugOptions = new QueryDebugOptions();
debugOptions.setEnableQueryTraceLog(true);
connectContext.getSessionVariable().setQueryDebugOptions(debugOptions.toString());
Pair<QueryDumpInfo, String> replayPair =
getPlanFragment(getDumpInfoFromFile("query_dump/materialized-view/count_star_rewrite"),
connectContext.getSessionVariable(), TExplainLevel.NORMAL);
assertContains(replayPair.second, "tbl_mock_067");
// NOTE: OUTPUT EXPRS must refer to coalesce column ref
assertContains(replayPair.second, " OUTPUT EXPRS:59: count\n" +
" PARTITION: RANDOM\n" +
"\n" +
" RESULT SINK\n" +
"\n" +
" 3:Project\n" +
" | <slot 59> : coalesce(80: count, 0)");
}
}
>>>>>>> b5a9d75885 ([BugFix] Fix star count return null rather zero bug for mv rewrite (#49288))

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
-- name: test_mv_rewrite_with_count_star
create database db_${uuid0};
-- result:
-- !result
use db_${uuid0};
-- result:
-- !result
CREATE TABLE `t1` (
`k1` date not null,
`k2` datetime not null,
`k3` char(20),
`k4` varchar(20),
`k5` boolean,
`k6` tinyint,
`k7` smallint,
`k8` int,
`k9` bigint,
`k10` largeint,
`k11` float,
`k12` double,
`k13` decimal(27,9) )
DUPLICATE KEY(`k1`, `k2`, `k3`, `k4`, `k5`)
PARTITION BY RANGE(`k1`)
(
PARTITION p20201022 VALUES [("2020-10-22"), ("2020-10-23")),
PARTITION p20201023 VALUES [("2020-10-23"), ("2020-10-24")),
PARTITION p20201024 VALUES [("2020-10-24"), ("2020-10-25"))
)
DISTRIBUTED BY HASH(`k1`, `k2`, `k3`) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
) ;
-- result:
-- !result
INSERT INTO t1 VALUES ('2020-10-22','2020-10-23 12:12:12','k3','k4',0,1,2,3,4,5,1.1,1.12,2.889);
-- result:
-- !result
CREATE MATERIALIZED VIEW IF NOT EXISTS test_mv1
PARTITION BY `k1`
DISTRIBUTED BY HASH(`k1`)
REFRESH DEFERRED ASYNC
as
select k1, k2, sum(k6), sum(k7), sum(k8), count(1) as cnt from t1 group by k1, k2;
-- result:
-- !result
CREATE MATERIALIZED VIEW IF NOT EXISTS test_mv2
PARTITION BY `k1`
DISTRIBUTED BY HASH(`k1`)
REFRESH DEFERRED ASYNC
as
select k1, k2, sum(cnt) as sum_cnt from test_mv1 group by k1, k2;
-- result:
-- !result
refresh materialized view test_mv1 with sync mode;
refresh materialized view test_mv2 with sync mode;
function: print_hit_materialized_view("select count(*) from t1 where k1 = '2020-10-22';", "test_mv2")
-- result:
True
-- !result
function: print_hit_materialized_view("select count(*) from t1 where k1 = '2024-10-22';", "test_mv2")
-- result:
False
-- !result
function: print_hit_materialized_view("select count(*) from t1 where k2 = '2020-10-22';", "test_mv2")
-- result:
True
-- !result
function: print_hit_materialized_view("select count(*) from t1 where k2 = '2024-10-22';", "test_mv2")
-- result:
True
-- !result
select count(*) from t1 where k1 = '2020-10-22';
-- result:
1
-- !result
select count(*) from t1 where k1 = '2024-10-24';
-- result:
0
-- !result
select count(*) from t1 where k2 = '2020-10-22';
-- result:
0
-- !result
select count(*) from t1 where k2 = '2024-10-24';
-- result:
0
-- !result
drop materialized view test_mv1;
-- result:
-- !result
drop materialized view test_mv2;
-- result:
-- !result
drop table t1;
-- result:
-- !result
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
-- name: test_mv_rewrite_with_count_star

create database db_${uuid0};
use db_${uuid0};
CREATE TABLE `t1` (
`k1` date not null,
`k2` datetime not null,
`k3` char(20),
`k4` varchar(20),
`k5` boolean,
`k6` tinyint,
`k7` smallint,
`k8` int,
`k9` bigint,
`k10` largeint,
`k11` float,
`k12` double,
`k13` decimal(27,9) )
DUPLICATE KEY(`k1`, `k2`, `k3`, `k4`, `k5`)
PARTITION BY RANGE(`k1`)
(
PARTITION p20201022 VALUES [("2020-10-22"), ("2020-10-23")),
PARTITION p20201023 VALUES [("2020-10-23"), ("2020-10-24")),
PARTITION p20201024 VALUES [("2020-10-24"), ("2020-10-25"))
)
DISTRIBUTED BY HASH(`k1`, `k2`, `k3`) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
) ;

INSERT INTO t1 VALUES ('2020-10-22','2020-10-23 12:12:12','k3','k4',0,1,2,3,4,5,1.1,1.12,2.889);
CREATE MATERIALIZED VIEW IF NOT EXISTS test_mv1
PARTITION BY `k1`
DISTRIBUTED BY HASH(`k1`)
REFRESH DEFERRED ASYNC
as
select k1, k2, sum(k6), sum(k7), sum(k8), count(1) as cnt from t1 group by k1, k2;

CREATE MATERIALIZED VIEW IF NOT EXISTS test_mv2
PARTITION BY `k1`
DISTRIBUTED BY HASH(`k1`)
REFRESH DEFERRED ASYNC
as
select k1, k2, sum(cnt) as sum_cnt from test_mv1 group by k1, k2;

refresh materialized view test_mv1 with sync mode;
refresh materialized view test_mv2 with sync mode;
function: print_hit_materialized_view("select count(*) from t1 where k1 = '2020-10-22';", "test_mv2")
function: print_hit_materialized_view("select count(*) from t1 where k1 = '2024-10-22';", "test_mv2")
function: print_hit_materialized_view("select count(*) from t1 where k2 = '2020-10-22';", "test_mv2")
function: print_hit_materialized_view("select count(*) from t1 where k2 = '2024-10-22';", "test_mv2")
select count(*) from t1 where k1 = '2020-10-22';
select count(*) from t1 where k1 = '2024-10-24';
select count(*) from t1 where k2 = '2020-10-22';
select count(*) from t1 where k2 = '2024-10-24';

drop materialized view test_mv1;
drop materialized view test_mv2;
drop table t1;

0 comments on commit 712806a

Please sign in to comment.