Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix] Fix star count return null rather zero bug for mv rewrite (backport #49288) #49313

Merged
merged 1 commit into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,15 @@ private List<ScalarOperator> rewriteGroupKeys(List<ScalarOperator> groupKeys,

/**
* Rewrite aggregation by using MV.
* @param aggregates aggregation column ref -> scalar op to be rewritten
* @param equationRewriter equivalence class rewriter
* @param mapping output mapping for rewrite: column ref -> column ref
* @param queryColumnSet column set of query
* @param aggColRefToAggMap aggregate query column ref -> new scalar op which is used for rewrite mapping
* @param newProjection new projection mapping: col ref -> scalar op which is used for projection of new rewritten aggregate
* @param hasGroupByKeys whether query has group by keys or not
* @param context rewrite context
* @return
*/
private Map<ColumnRefOperator, CallOperator> rewriteAggregates(Map<ColumnRefOperator, ScalarOperator> aggregates,
EquationRewriter equationRewriter,
Expand Down Expand Up @@ -697,10 +706,16 @@ private Map<ColumnRefOperator, CallOperator> rewriteAggregates(Map<ColumnRefOper
ColumnRefOperator newAggColRef = context.getQueryRefFactory().create(
origColRef, newAggregate.getType(), newAggregate.isNullable());
newAggregations.put(newAggColRef, newAggregate);
newProjection.put(newAggColRef, genRollupProject(aggCall, newAggColRef, hasGroupByKeys));

// replace by using new column ref
aggColRefToAggMap.put(entry.getKey(), newAggColRef);
// No needs to set `newProjections` since it will use aggColRefToAggMap to construct new projections,
// otherwise it will cause duplicate projections(or wrong projections).
// eg:
// query: oldCol1 -> count()
// newAggregations: newCol1 -> sum(oldCol1)
// aggColRefToAggMap: oldCol1 -> coalesce(newCol1, 0)
// It will generate new projections as below:
// newProjections: oldCol1 -> coalesce(newCol1, 0)
ScalarOperator newProjectOp = genRollupProject(aggCall, newAggColRef, hasGroupByKeys);
aggColRefToAggMap.put(origColRef, newProjectOp);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5505,12 +5505,10 @@ public void testColumnPruningWithPredicates() {
" group by lo_orderdate" +
" having sum(lo_tax) > 100";
MVRewriteChecker checker = testRewriteOK(mv, query);
checker.contains("4:Project\n" +
checker.contains(" 4:Project\n" +
" | <slot 6> : 21: lo_orderdate\n" +
" | <slot 18> : 26: sum\n" +
" | <slot 19> : 27: sum\n" +
" | <slot 26> : clone(26: sum)\n" +
" | <slot 27> : clone(27: sum)\n" +
" | \n" +
" 3:AGGREGATE (merge finalize)");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,4 +222,23 @@ public void testViewDeltaRewriter() throws Exception {
connectContext.getSessionVariable(), TExplainLevel.NORMAL);
Assert.assertTrue(replayPair.second, replayPair.second.contains("mv_yyf_trade_water3"));
}

@Test
public void testMV_CountStarRewrite() throws Exception {
QueryDebugOptions debugOptions = new QueryDebugOptions();
debugOptions.setEnableQueryTraceLog(true);
connectContext.getSessionVariable().setQueryDebugOptions(debugOptions.toString());
Pair<QueryDumpInfo, String> replayPair =
getPlanFragment(getDumpInfoFromFile("query_dump/materialized-view/count_star_rewrite"),
connectContext.getSessionVariable(), TExplainLevel.NORMAL);
assertContains(replayPair.second, "tbl_mock_067");
// NOTE: OUTPUT EXPRS must refer to coalesce column ref
assertContains(replayPair.second, " OUTPUT EXPRS:59: count\n" +
" PARTITION: RANDOM\n" +
"\n" +
" RESULT SINK\n" +
"\n" +
" 3:Project\n" +
" | <slot 59> : coalesce(80: count, 0)");
}
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
-- name: test_mv_rewrite_with_count_star
create database db_${uuid0};
-- result:
-- !result
use db_${uuid0};
-- result:
-- !result
CREATE TABLE `t1` (
`k1` date not null,
`k2` datetime not null,
`k3` char(20),
`k4` varchar(20),
`k5` boolean,
`k6` tinyint,
`k7` smallint,
`k8` int,
`k9` bigint,
`k10` largeint,
`k11` float,
`k12` double,
`k13` decimal(27,9) )
DUPLICATE KEY(`k1`, `k2`, `k3`, `k4`, `k5`)
PARTITION BY RANGE(`k1`)
(
PARTITION p20201022 VALUES [("2020-10-22"), ("2020-10-23")),
PARTITION p20201023 VALUES [("2020-10-23"), ("2020-10-24")),
PARTITION p20201024 VALUES [("2020-10-24"), ("2020-10-25"))
)
DISTRIBUTED BY HASH(`k1`, `k2`, `k3`) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
) ;
-- result:
-- !result
INSERT INTO t1 VALUES ('2020-10-22','2020-10-23 12:12:12','k3','k4',0,1,2,3,4,5,1.1,1.12,2.889);
-- result:
-- !result
CREATE MATERIALIZED VIEW IF NOT EXISTS test_mv1
PARTITION BY `k1`
DISTRIBUTED BY HASH(`k1`)
REFRESH DEFERRED ASYNC
as
select k1, k2, sum(k6), sum(k7), sum(k8), count(1) as cnt from t1 group by k1, k2;
-- result:
-- !result
CREATE MATERIALIZED VIEW IF NOT EXISTS test_mv2
PARTITION BY `k1`
DISTRIBUTED BY HASH(`k1`)
REFRESH DEFERRED ASYNC
as
select k1, k2, sum(cnt) as sum_cnt from test_mv1 group by k1, k2;
-- result:
-- !result
refresh materialized view test_mv1 with sync mode;
refresh materialized view test_mv2 with sync mode;
function: print_hit_materialized_view("select count(*) from t1 where k1 = '2020-10-22';", "test_mv2")
-- result:
True
-- !result
function: print_hit_materialized_view("select count(*) from t1 where k1 = '2024-10-22';", "test_mv2")
-- result:
False
-- !result
function: print_hit_materialized_view("select count(*) from t1 where k2 = '2020-10-22';", "test_mv2")
-- result:
True
-- !result
function: print_hit_materialized_view("select count(*) from t1 where k2 = '2024-10-22';", "test_mv2")
-- result:
True
-- !result
select count(*) from t1 where k1 = '2020-10-22';
-- result:
1
-- !result
select count(*) from t1 where k1 = '2024-10-24';
-- result:
0
-- !result
select count(*) from t1 where k2 = '2020-10-22';
-- result:
0
-- !result
select count(*) from t1 where k2 = '2024-10-24';
-- result:
0
-- !result
drop materialized view test_mv1;
-- result:
-- !result
drop materialized view test_mv2;
-- result:
-- !result
drop table t1;
-- result:
-- !result
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
-- name: test_mv_rewrite_with_count_star

create database db_${uuid0};
use db_${uuid0};
CREATE TABLE `t1` (
`k1` date not null,
`k2` datetime not null,
`k3` char(20),
`k4` varchar(20),
`k5` boolean,
`k6` tinyint,
`k7` smallint,
`k8` int,
`k9` bigint,
`k10` largeint,
`k11` float,
`k12` double,
`k13` decimal(27,9) )
DUPLICATE KEY(`k1`, `k2`, `k3`, `k4`, `k5`)
PARTITION BY RANGE(`k1`)
(
PARTITION p20201022 VALUES [("2020-10-22"), ("2020-10-23")),
PARTITION p20201023 VALUES [("2020-10-23"), ("2020-10-24")),
PARTITION p20201024 VALUES [("2020-10-24"), ("2020-10-25"))
)
DISTRIBUTED BY HASH(`k1`, `k2`, `k3`) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
) ;

INSERT INTO t1 VALUES ('2020-10-22','2020-10-23 12:12:12','k3','k4',0,1,2,3,4,5,1.1,1.12,2.889);
CREATE MATERIALIZED VIEW IF NOT EXISTS test_mv1
PARTITION BY `k1`
DISTRIBUTED BY HASH(`k1`)
REFRESH DEFERRED ASYNC
as
select k1, k2, sum(k6), sum(k7), sum(k8), count(1) as cnt from t1 group by k1, k2;

CREATE MATERIALIZED VIEW IF NOT EXISTS test_mv2
PARTITION BY `k1`
DISTRIBUTED BY HASH(`k1`)
REFRESH DEFERRED ASYNC
as
select k1, k2, sum(cnt) as sum_cnt from test_mv1 group by k1, k2;

refresh materialized view test_mv1 with sync mode;
refresh materialized view test_mv2 with sync mode;
function: print_hit_materialized_view("select count(*) from t1 where k1 = '2020-10-22';", "test_mv2")
function: print_hit_materialized_view("select count(*) from t1 where k1 = '2024-10-22';", "test_mv2")
function: print_hit_materialized_view("select count(*) from t1 where k2 = '2020-10-22';", "test_mv2")
function: print_hit_materialized_view("select count(*) from t1 where k2 = '2024-10-22';", "test_mv2")
select count(*) from t1 where k1 = '2020-10-22';
select count(*) from t1 where k1 = '2024-10-24';
select count(*) from t1 where k2 = '2020-10-22';
select count(*) from t1 where k2 = '2024-10-24';

drop materialized view test_mv1;
drop materialized view test_mv2;
drop table t1;
Loading