Skip to content

Commit

Permalink
opt: fix greedy lookup join constraints
Browse files Browse the repository at this point in the history
Previously, lookup joins would be planned that constrained as many index
columns as possible. This is not optimal when a suffix of columns are
constrained to multiple values or a range of constant values by optional
filters. These suffixes increase the number of lookup spans without
making the constraint more selective. In the case where suffix columns
are constrained to multiple values, a lookup span is generated for each
value, significantly increasing the number of lookups performed for each
input row.

This commit makes the building of lookup join constraints less greedy.
The suffix of constrained columns will only be constrained to multiple
values or a range over some constant values if those constraints
originate from the query filter.

Fixes cockroachdb#75596

Release note (performance improvement): The optimizer now plans more
efficient lookup joins in some cases.
  • Loading branch information
mgartner committed Oct 3, 2024
1 parent 072d3e0 commit bc493bf
Show file tree
Hide file tree
Showing 5 changed files with 194 additions and 40 deletions.
45 changes: 38 additions & 7 deletions pkg/sql/opt/lookupjoin/constraint_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,20 @@ func (b *ConstraintBuilder) Build(
colsAlloc := make(opt.ColList, numIndexKeyCols*2)
keyCols := colsAlloc[0:0:numIndexKeyCols]
rightSideCols := colsAlloc[numIndexKeyCols : numIndexKeyCols : numIndexKeyCols*2]
var inputProjections memo.ProjectionsExpr
var lookupExpr memo.FiltersExpr
var allLookupFilters memo.FiltersExpr
var filterOrdsToExclude intsets.Fast
foundLookupCols := false
var remainingFilters memo.FiltersExpr
var (
inputProjections memo.ProjectionsExpr
lookupExpr memo.FiltersExpr
allLookupFilters memo.FiltersExpr
remainingFilters memo.FiltersExpr
filterOrdsToExclude intsets.Fast
)
// We do not want a suffix of the index columns to be constrained to
// multiple values by optional filters. This would only increase the number
// of lookup spans without making the constraint more selective. We keep
// track of the suffix length of indexed columns constrained in this way so
// that we can remove them after the loop.
optionalMultiValFilterSuffixLen := 0

// addEqualityColumns adds the given columns as an equality in keyCols and
// rightSideCols.
Expand All @@ -249,6 +257,7 @@ func (b *ConstraintBuilder) Build(
filterOrdsToExclude.Add(eqFilterOrds[eqIdx])
foundEqualityCols = true
foundLookupCols = true
optionalMultiValFilterSuffixLen = 0
continue
}

Expand Down Expand Up @@ -278,6 +287,7 @@ func (b *ConstraintBuilder) Build(
derivedEquivCols.Add(idxCol)
foundEqualityCols = true
foundLookupCols = true
optionalMultiValFilterSuffixLen = 0
continue
}

Expand All @@ -302,6 +312,7 @@ func (b *ConstraintBuilder) Build(
allLookupFilters = append(allLookupFilters, b.allFilters[allIdx])
addEqualityColumns(constColID, idxCol)
filterOrdsToExclude.Add(allIdx)
optionalMultiValFilterSuffixLen = 0
continue
}

Expand All @@ -319,7 +330,15 @@ func (b *ConstraintBuilder) Build(
}
lookupExpr = append(lookupExpr, valsFilter)
allLookupFilters = append(allLookupFilters, b.allFilters[allIdx])
filterOrdsToExclude.Add(allIdx)
if isOptional := allIdx >= len(onFilters); isOptional {
optionalMultiValFilterSuffixLen++
} else {
// There's no need to track optional filters for reducing the
// remaining filters because they are not present in the ON
// filters to begin with.
filterOrdsToExclude.Add(allIdx)
}

continue
}

Expand All @@ -333,12 +352,14 @@ func (b *ConstraintBuilder) Build(
allLookupFilters = append(allLookupFilters, b.allFilters[startIdx])
filterOrdsToExclude.Add(startIdx)
foundLookupCols = true
optionalMultiValFilterSuffixLen = 0
}
if foundEnd {
lookupExpr = append(lookupExpr, b.allFilters[endIdx])
allLookupFilters = append(allLookupFilters, b.allFilters[endIdx])
filterOrdsToExclude.Add(endIdx)
foundLookupCols = true
optionalMultiValFilterSuffixLen = 0
}
if foundStart && foundEnd {
// The column is constrained above and below by an inequality; no further
Expand All @@ -351,8 +372,11 @@ func (b *ConstraintBuilder) Build(
// case that only the start or end bound could be constrained with
// an input column; in this case, it still may be possible to use a constant
// to form the other bound.
//
// We exclude optional filters from this search because an optional
// range filter will not make the lookup more selective.
rangeFilter, remaining, filterIdx := b.findJoinConstantRangeFilter(
b.allFilters, idxCol, idxColIsDesc, !foundStart, !foundEnd,
onFilters, idxCol, idxColIsDesc, !foundStart, !foundEnd,
)
if rangeFilter != nil {
// A constant range filter could be found.
Expand All @@ -377,6 +401,13 @@ func (b *ConstraintBuilder) Build(
return Constraint{}, false
}

// Remove the suffix of index columns constrained to multiple values by
// optional filters.
if lookupExpr != nil && optionalMultiValFilterSuffixLen > 0 {
lookupExpr = lookupExpr[:len(lookupExpr)-optionalMultiValFilterSuffixLen]
allLookupFilters = allLookupFilters[:len(allLookupFilters)-optionalMultiValFilterSuffixLen]
}

// If a lookup expression is required, convert the equality columns to
// equalities in the lookup expression.
if len(lookupExpr) > 0 {
Expand Down
49 changes: 49 additions & 0 deletions pkg/sql/opt/lookupjoin/testdata/computed
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,55 @@ input projections:
v_eq = a + 10
lookup_join_const_col_@7 = 0

lookup-constraints left=(a int, b int) right=(x int, v int not null as (x + 10) virtual, y INT) index=(v, y, x)
x = a
optional: y IN (10, 20)
----
input projections:
v_eq = a + 10
lookup expression:
((y IN (10, 20)) AND (v_eq = v)) AND (a = x)

lookup-constraints left=(a int, b int) right=(x int, v int not null as (x + 10) virtual, y INT, z INT) index=(v, y, x, z)
x = a AND z > 0
optional: y IN (10, 20)
----
input projections:
v_eq = a + 10
lookup expression:
(((y IN (10, 20)) AND (z > 0)) AND (v_eq = v)) AND (a = x)

lookup-constraints left=(a int, b int) right=(x int, v int not null as (x + 10) virtual, y INT, w INT) index=(v, y, x)
x = a AND w > 0
optional: y IN (10, 20)
----
input projections:
v_eq = a + 10
lookup expression:
((y IN (10, 20)) AND (v_eq = v)) AND (a = x)
remaining filters:
w > 0

lookup-constraints left=(a int, b int) right=(x int, v int not null as (x + 10) virtual, z INT) index=(v, x, z)
x = a
optional: z IN (10, 20)
----
key cols:
v = v_eq
x = a
input projections:
v_eq = a + 10

lookup-constraints left=(a int, b int) right=(x int, v int not null as (x + 10) virtual, z INT) index=(v, x, z)
x = a
optional: z > 0
----
key cols:
v = v_eq
x = a
input projections:
v_eq = a + 10

# TODO(mgartner): We should be able to generate a lookup join by determining
# that v is not null because the filter demands that x is not null, and v is
# calculated from x.
Expand Down
53 changes: 53 additions & 0 deletions pkg/sql/opt/lookupjoin/testdata/key_cols
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,56 @@ key cols:
z = a
input projections:
lookup_join_const_col_@5 = 0

lookup-constraints left=(a int, b int) right=(x int, y int, z int) index=(x, y, z)
x = a AND y = b
optional: z IN (3, 4)
----
key cols:
x = a
y = b

lookup-constraints left=(a int, b int) right=(x int, y int, z int) index=(x, y, z)
x = 1 AND y = b
optional: z IN (3, 4)
----
key cols:
x = lookup_join_const_col_@6
y = b
input projections:
lookup_join_const_col_@6 = 1

lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z)
x = a
optional: z IN (3, 4) AND y IN (10, 20)
----
key cols:
x = a

lookup-constraints left=(a int, b int) right=(x int, y int, z int) index=(x, y, z)
x = a AND y = b
optional: z > 10
----
key cols:
x = a
y = b

lookup-constraints left=(a int, b int) right=(x int, y int, z int) index=(x, y, z)
x = 1 AND y = b
optional: z > 10
----
key cols:
x = lookup_join_const_col_@6
y = b
input projections:
lookup_join_const_col_@6 = 1

lookup-constraints left=(a int, b int) right=(x int, y int, z int, zz int) index=(x, y, z, zz)
x = 1 AND y = b
optional: z > 10 AND zz > 0
----
key cols:
x = lookup_join_const_col_@6
y = b
input projections:
lookup_join_const_col_@6 = 1
79 changes: 52 additions & 27 deletions pkg/sql/opt/lookupjoin/testdata/lookup_expr
Original file line number Diff line number Diff line change
Expand Up @@ -78,24 +78,33 @@ lookup-constraints left=(a int, b int) right=(x int, y int) index=(x, y)
x = a
optional: y IN (1, 2, 3)
----
lookup expression:
(y IN (1, 2, 3)) AND (a = x)
key cols:
x = a

lookup-constraints left=(a int, b int) right=(x int, y int, z int) index=(x, y)
x = a AND z = 1
optional: y IN (1, 2, 3)
----
lookup expression:
(y IN (1, 2, 3)) AND (a = x)
key cols:
x = a
remaining filters:
z = 1

lookup-constraints left=(a int, b int) right=(x int, y int, z int) index=(x, y)
x = a
optional: y IN (1, 2, 3) AND z = 1
----
key cols:
x = a

lookup-constraints left=(a int, b int) right=(x int, y int, z int) index=(x, y, z)
x = a
optional: y IN (1, 2, 3) AND z = 1
----
input projections:
lookup_join_const_col_@8 = 1
lookup expression:
(y IN (1, 2, 3)) AND (a = x)
((y IN (1, 2, 3)) AND (a = x)) AND (lookup_join_const_col_@8 = z)

lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z)
x = 1 AND z = c
Expand All @@ -118,20 +127,7 @@ y = b
optional: x IN (1, 2) AND z IN (3, 4)
----
lookup expression:
((x IN (1, 2)) AND (z IN (3, 4))) AND (b = y)

# TODO(#75596): The lookup expression should not contain (z IN (3, 4)) because
# it is an optional filter from a CHECK constraint. It will only increase the
# number of lookup spans generated without increasing the selectivity of the
# lookup.
lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z)
x = 1 AND y = b
optional: z IN (3, 4)
----
input projections:
lookup_join_const_col_@7 = 1
lookup expression:
((z IN (3, 4)) AND (lookup_join_const_col_@7 = x)) AND (b = y)
(x IN (1, 2)) AND (b = y)

# The most restrictive IN filter should be chosen.
lookup-constraints left=(a int, b int) right=(x int, y int) index=(x, y)
Expand Down Expand Up @@ -188,8 +184,8 @@ lookup-constraints left=(a int, b int) right=(x int, y int) index=(x, y)
x = a
optional: y > 0
----
lookup expression:
(y > 0) AND (a = x)
key cols:
x = a

lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z)
x = a AND y = b AND z > 0
Expand Down Expand Up @@ -217,26 +213,27 @@ lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=
x = 1 AND y = b
optional: z > 0
----
key cols:
x = lookup_join_const_col_@7
y = b
input projections:
lookup_join_const_col_@7 = 1
lookup expression:
((z > 0) AND (lookup_join_const_col_@7 = x)) AND (b = y)

lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y)
x = a AND z = 1
optional: y > 0
----
lookup expression:
(y > 0) AND (a = x)
key cols:
x = a
remaining filters:
z = 1

lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y)
x = a
optional: y > 0 AND z = 1
----
lookup expression:
(y > 0) AND (a = x)
key cols:
x = a


# Test for range filters and IN filters.
Expand Down Expand Up @@ -281,6 +278,20 @@ x IN (10, 20, 30, 40) AND y = b AND x > 10
lookup expression:
(x IN (20, 30, 40)) AND (b = y)

lookup-constraints left=(a int) right=(x int, y int, z int) index=(x, y, z)
x IN (1, 2) AND y = a
optional: z > 10
----
lookup expression:
(x IN (1, 2)) AND (a = y)

lookup-constraints left=(a int, b int) right=(x int, y int, z int, zz int) index=(x, y, z, zz)
x IN (1, 2) AND y = b
optional: z IN (10, 20, 30) AND zz > 0
----
lookup expression:
(x IN (1, 2)) AND (b = y)

# Test for range filters on input columns.

lookup-constraints left=(a int, b int) right=(x int, y int) index=(x, y)
Expand Down Expand Up @@ -464,3 +475,17 @@ x <= a
----
lookup expression:
x <= a

lookup-constraints left=(a int, b int) right=(x int, y int) index=(x, y)
y > b
optional: x IN (10, 20, 30)
----
lookup expression:
(x IN (10, 20, 30)) AND (y > b)

lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z)
x = a AND z > b
optional: y IN (10, 20, 30)
----
lookup expression:
((y IN (10, 20, 30)) AND (z > b)) AND (a = x)
8 changes: 2 additions & 6 deletions pkg/sql/opt/xform/testdata/rules/join
Original file line number Diff line number Diff line change
Expand Up @@ -3081,7 +3081,7 @@ left-join (lookup lookup_expr [as=t])
│ └── filters (true)
└── filters (true)

# The filters in the lookup expression should constrain only the first 3 columns
# The filters in the lookup expression should constrain only the first 2 columns
# of idx_u_etc.
opt expect=GenerateLookupJoins
SELECT * FROM (VALUES (1, 10), (2, 20), (3, NULL)) AS q(w, u) LEFT JOIN lookup_expr t
Expand All @@ -3098,7 +3098,6 @@ left-join (lookup lookup_expr [as=t])
│ ├── lookup expression
│ │ └── filters
│ │ ├── r:3 IN ('east', 'west') [outer=(3), constraints=(/3: [/'east' - /'east'] [/'west' - /'west']; tight)]
│ │ ├── y:9 IN (10, 20) [outer=(9), constraints=(/9: [/10 - /10] [/20 - /20]; tight)]
│ │ └── column2:2 = u:5 [outer=(2,5), constraints=(/2: (/NULL - ]; /5: (/NULL - ]), fd=(2)==(5), (5)==(2)]
│ ├── cardinality: [3 - ]
│ ├── fd: (3,4)-->(5-7,9,10)
Expand Down Expand Up @@ -3946,10 +3945,7 @@ inner-join (lookup abcd_check)
├── fd: (1)==(6), (6)==(1)
├── inner-join (lookup abcd_check@abcd_check_a_b_c_idx)
│ ├── columns: m:1!null n:2 a:6!null b:7!null c:8 abcd_check.rowid:10!null
│ ├── lookup expression
│ │ └── filters
│ │ ├── b:7 IN (10, 20) [outer=(7), constraints=(/7: [/10 - /10] [/20 - /20]; tight)]
│ │ └── m:1 = a:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)]
│ ├── key columns: [1] = [6]
│ ├── fd: (10)-->(6-8), (1)==(6), (6)==(1)
│ ├── scan small
│ │ └── columns: m:1 n:2
Expand Down

0 comments on commit bc493bf

Please sign in to comment.