Skip to content

Commit

Permalink
opt: improve exists subquery hoisting
Browse files Browse the repository at this point in the history
This commit makes a small improvement to the subquery-hoisting rules so
that hoisting an `EXISTS` subquery can often avoid projecting a new
column to check for NULL values. This can allow other optimization rules
to match later on.

Epic: None

Release note: None
  • Loading branch information
DrewKimball committed Oct 2, 2024
1 parent 5b364a5 commit 5bc3c1e
Show file tree
Hide file tree
Showing 10 changed files with 973 additions and 1,178 deletions.
16 changes: 8 additions & 8 deletions pkg/sql/opt/memo/testdata/stats/with
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,13 @@ with &1 (t0)
├── stats: [rows=10000]
├── fd: ()-->(30)
├── inner-join (cross)
│ ├── columns: true_agg:28(bool!null)
│ ├── columns: canary_agg:28(bool!null)
│ ├── stats: [rows=10000]
│ ├── fd: ()-->(28)
│ ├── scan a
│ │ └── stats: [rows=5000]
│ ├── inner-join (cross)
│ │ ├── columns: true_agg:28(bool!null)
│ │ ├── columns: canary_agg:28(bool!null)
│ │ ├── cardinality: [0 - 2]
│ │ ├── multiplicity: left-rows(zero-or-one), right-rows(one-or-more)
│ │ ├── stats: [rows=2]
Expand All @@ -132,29 +132,29 @@ with &1 (t0)
│ │ │ ├── cardinality: [1 - 2]
│ │ │ └── stats: [rows=2]
│ │ ├── select
│ │ │ ├── columns: true_agg:28(bool!null)
│ │ │ ├── columns: canary_agg:28(bool!null)
│ │ │ ├── cardinality: [0 - 1]
│ │ │ ├── stats: [rows=1, distinct(28)=1, null(28)=0]
│ │ │ ├── key: ()
│ │ │ ├── fd: ()-->(28)
│ │ │ ├── scalar-group-by
│ │ │ │ ├── columns: true_agg:28(bool)
│ │ │ │ ├── columns: canary_agg:28(bool)
│ │ │ │ ├── cardinality: [1 - 1]
│ │ │ │ ├── stats: [rows=1, distinct(28)=1, null(28)=0]
│ │ │ │ ├── key: ()
│ │ │ │ ├── fd: ()-->(28)
│ │ │ │ ├── values
│ │ │ │ │ ├── columns: true:27(bool!null)
│ │ │ │ │ ├── columns: canary:27(bool!null)
│ │ │ │ │ ├── cardinality: [1 - 1]
│ │ │ │ │ ├── stats: [rows=1]
│ │ │ │ │ ├── key: ()
│ │ │ │ │ ├── fd: ()-->(27)
│ │ │ │ │ └── (true,) [type=tuple{bool}]
│ │ │ │ └── aggregations
│ │ │ │ └── const-agg [as=true_agg:28, type=bool, outer=(27)]
│ │ │ │ └── true:27 [type=bool]
│ │ │ │ └── const-agg [as=canary_agg:28, type=bool, outer=(27)]
│ │ │ │ └── canary:27 [type=bool]
│ │ │ └── filters
│ │ │ └── true_agg:28 IS NOT NULL [type=bool, outer=(28), constraints=(/28: (/NULL - ]; tight)]
│ │ │ └── canary_agg:28 IS NOT NULL [type=bool, outer=(28), constraints=(/28: (/NULL - ]; tight)]
│ │ └── filters (true)
│ └── filters (true)
└── projections
Expand Down
17 changes: 7 additions & 10 deletions pkg/sql/opt/memo/testdata/typing
Original file line number Diff line number Diff line change
Expand Up @@ -419,28 +419,25 @@ SELECT EXISTS(SELECT * FROM a WHERE expr<0) FROM (SELECT x+1 AS expr FROM a)
project
├── columns: exists:11(bool!null)
├── group-by (hash)
│ ├── columns: x:1(int!null) true_agg:13(bool)
│ ├── columns: x:1(int!null) canary_agg:12(int)
│ ├── grouping columns: x:1(int!null)
│ ├── left-join (cross)
│ │ ├── columns: x:1(int!null) expr:5(int!null) true:12(bool)
│ │ ├── columns: x:1(int!null) expr:5(int!null) x:6(int)
│ │ ├── project
│ │ │ ├── columns: expr:5(int!null) x:1(int!null)
│ │ │ ├── scan a
│ │ │ │ └── columns: x:1(int!null)
│ │ │ └── projections
│ │ │ └── x:1 + 1 [as=expr:5, type=int]
│ │ ├── project
│ │ │ ├── columns: true:12(bool!null)
│ │ │ ├── scan a
│ │ │ └── projections
│ │ │ └── true [as=true:12, type=bool]
│ │ ├── scan a
│ │ │ └── columns: x:6(int!null)
│ │ └── filters
│ │ └── expr:5 < 0 [type=bool]
│ └── aggregations
│ └── const-not-null-agg [as=true_agg:13, type=bool]
│ └── true:12 [type=bool]
│ └── const-not-null-agg [as=canary_agg:12, type=int]
│ └── x:6 [type=int]
└── projections
└── true_agg:13 IS NOT NULL [as=exists:11, type=bool]
└── canary_agg:12 IS NOT NULL [as=exists:11, type=bool]

# Cast
build
Expand Down
31 changes: 23 additions & 8 deletions pkg/sql/opt/norm/decorrelate_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1124,19 +1124,34 @@ func (r *subqueryHoister) constructConditionalExpr(scalar opt.ScalarExpr) opt.Sc
// CONST_AGG which will need to be changed to a CONST_NOT_NULL_AGG (which is
// defined to ignore those nulls so that its result will be unaffected).
func (r *subqueryHoister) constructGroupByExists(subquery memo.RelExpr) memo.RelExpr {
trueColID := r.f.Metadata().AddColumn("true", types.Bool)
aggColID := r.f.Metadata().AddColumn("true_agg", types.Bool)
var canaryColTyp *types.T
var canaryColID opt.ColumnID
var subqueryWithCanary memo.RelExpr
if subquery.Relational().NotNullCols.Empty() {
canaryColTyp = types.Bool
canaryColID = r.f.Metadata().AddColumn("canary", types.Bool)
subqueryWithCanary = r.f.ConstructProject(
subquery,
memo.ProjectionsExpr{r.f.ConstructProjectionsItem(memo.TrueSingleton, canaryColID)},
opt.ColSet{},
)
} else {
canaryColID, _ = subquery.Relational().NotNullCols.Next(0)
canaryColTyp = r.mem.Metadata().ColumnMeta(canaryColID).Type
subqueryWithCanary = r.f.ConstructProject(
subquery,
memo.ProjectionsExpr{},
opt.MakeColSet(canaryColID),
)
}
aggColID := r.f.Metadata().AddColumn("canary_agg", canaryColTyp)
existsColID := r.f.Metadata().AddColumn("exists", types.Bool)

return r.f.ConstructProject(
r.f.ConstructScalarGroupBy(
r.f.ConstructProject(
subquery,
memo.ProjectionsExpr{r.f.ConstructProjectionsItem(memo.TrueSingleton, trueColID)},
opt.ColSet{},
),
subqueryWithCanary,
memo.AggregationsExpr{r.f.ConstructAggregationsItem(
r.f.ConstructConstAgg(r.f.ConstructVariable(trueColID)),
r.f.ConstructConstAgg(r.f.ConstructVariable(canaryColID)),
aggColID,
)},
memo.EmptyGroupingPrivate,
Expand Down
Loading

0 comments on commit 5bc3c1e

Please sign in to comment.