-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor order-by fd check #14
base: functional-dependency
Are you sure you want to change the base?
Conversation
Signed-off-by: AilinKid <[email protected]>
Signed-off-by: ailinkid <[email protected]>
Signed-off-by: ailinkid <[email protected]>
planner/core/logical_plan_builder.go
Outdated
|
||
// check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442 | ||
//if hasDistinct && projExprs != nil { | ||
// err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this undo what pingcap#21286 has done about distinct and order by, leaving it be be checked by new FD logic, passed~
this also revealed ORM's only full group check for |
Signed-off-by: ailinkid <[email protected]>
Signed-off-by: ailinkid <[email protected]>
Signed-off-by: ailinkid <[email protected]>
Signed-off-by: arenatlx <[email protected]>
Signed-off-by: arenatlx <[email protected]>
Signed-off-by: arenatlx <[email protected]>
Signed-off-by: arenatlx <[email protected]>
@@ -3762,6 +4044,11 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L | |||
originalFields = sel.Fields.Fields | |||
} | |||
|
|||
// basic order-by checking, nothing to do with only-full-group-by mode | |||
if err := b.checkOrderByPart1(ctx, sel, p); err != nil { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or we can change the method name to basicOrderByCheck
to avoid the part1
.
// Since here doesn't require FD to check, this can take place as soon as possible. | ||
// Step1: | ||
isNonAggregatedQuery := false | ||
resolver4AggDetect := colResolverForOnlyFullGroupBy{ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, why do we need the colResolverForOnlyFullGroupBy
?
This one should be removed once we fully remove the old only_full_group_by check.
planner/core/logical_plan_builder.go
Outdated
continue | ||
} | ||
// GROUP BY t1.a, t2.b ORDER BY COALESCE(MIN(t.c), t2.b), agg min is determined by group-col definitely. | ||
baseColsUniqueIDs := fd.NewFastIntSet() | ||
for _, bc := range baseCols { | ||
baseColsUniqueIDs.Insert(int(bc.UniqueID)) | ||
} | ||
if baseColsUniqueIDs.SubsetOf(strictClosureOfGroupByCols) { | ||
continue | ||
} | ||
if baseColsUniqueIDs.SubsetOf(strictClosureOfSelectCols) { | ||
continue | ||
} | ||
errShowCol := baseCols[0] | ||
for _, col := range baseCols { | ||
colSet := fd.NewFastIntSet(int(col.UniqueID)) | ||
if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) { | ||
errShowCol = col | ||
break | ||
} | ||
} | ||
// better use the schema alias name firstly if any. | ||
name := "" | ||
for idx, schemaCol := range proj.Schema().Columns { | ||
if schemaCol.UniqueID == errShowCol.UniqueID { | ||
name = proj.names[idx].String() | ||
break | ||
} | ||
} | ||
if name == "" { | ||
name = errShowCol.OrigName | ||
} | ||
return nil, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInOrderBy, name) | ||
} | ||
} | ||
if sel.Distinct { | ||
// order-by with distinct case | ||
// Rule #1: order by item should be in the select filed list | ||
// Rule #2: the base col that order by item dependent on should be in the select field list | ||
for offset, odrItem := range orderByItemExprs { | ||
item := fd.NewFastIntSet() | ||
switch x := odrItem.Expr.(type) { | ||
case *expression.Column: | ||
item.Insert(int(x.UniqueID)) | ||
case *expression.ScalarFunction: | ||
// order by item may not be projected as a column in projection, allocated a new one | ||
scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) | ||
if !ok { | ||
// panic("selected expr must have been registered, shouldn't be here") | ||
scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID()) | ||
} | ||
item.Insert(scalarUniqueID) | ||
default: | ||
} | ||
// Rule #1 | ||
if item.SubsetOf(selectExprsUniqueIDs) { | ||
continue | ||
} | ||
// Rule #2 | ||
baseCols := expression.ExtractColumns(odrItem.Expr) | ||
if len(baseCols) == 0 { | ||
// order by item has no reference of base col, like order by abs(1) and rand() | ||
continue | ||
} | ||
colSet := fd.NewFastIntSet() | ||
for _, col := range baseCols { | ||
colSet.Insert(int(col.UniqueID)) | ||
} | ||
if colSet.SubsetOf(selectExprsUniqueIDs) { | ||
continue | ||
} | ||
// find that error base col | ||
errShowCol := baseCols[0] | ||
for _, col := range baseCols { | ||
colSet := fd.NewFastIntSet() | ||
colSet.Insert(int(col.UniqueID)) | ||
if !colSet.SubsetOf(selectExprsUniqueIDs) { | ||
errShowCol = col | ||
break | ||
} | ||
} | ||
// better use the schema alias name firstly if any. | ||
name := "" | ||
for idx, schemaCol := range proj.Schema().Columns { | ||
if schemaCol.UniqueID == errShowCol.UniqueID { | ||
name = proj.names[idx].String() | ||
break | ||
} | ||
} | ||
if name == "" { | ||
name = errShowCol.OrigName | ||
} | ||
if _, ok := sel.OrderBy.Items[offset].Expr.(*ast.AggregateFuncExpr); ok { | ||
return nil, ErrAggregateInOrderNotSelect.GenWithStackByArgs(offset+1, "DISTINCT") | ||
} | ||
// select distinct count(a) from t group by b order by sum(a); ✗ | ||
return nil, ErrFieldInOrderNotSelect.GenWithStackByArgs(offset+1, name, "DISTINCT") | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems that now we can just add one more condition, becoming !b.isCreateView && b.ctx.GetSessionVars().SQLMode.HasOnlyFullGroupBy()
.
Then we can merge the buildSort
and buildSortWithCheck
.
Even further, we can split this if block to a single method.
case *expression.ScalarFunction: | ||
scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) | ||
if !ok { | ||
panic("selected expr must have been registered, shouldn't be here") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Throw log msg instead.
panic("selected expr must have been registered, shouldn't be here") | ||
} | ||
selectExprsUniqueIDs.Insert(scalarUniqueID) | ||
default: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it's empty, we can just remove it.
// order by item may not be projected as a column in projection, allocated a new one | ||
scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) | ||
if !ok { | ||
// panic("selected expr must have been registered, shouldn't be here") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remember to remove it.
item.Insert(scalarUniqueID) | ||
case *expression.Constant: | ||
// order by null/false/true can always be ok, let item be empty. | ||
default: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove it if it's empty.
// Step#3: whether order by item is in the FD closure of group-by & select list items. | ||
// Since FD hasn't been built yet for now, let's delay this logic to checkOrderByPart2. | ||
} else { | ||
// check only-full-group-by 4 order-by clause without group-by clause |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The following check isn't used for only_full_group_by
, just for basic name resolving checks?
Signed-off-by: AilinKid <[email protected]>
2d18cc8
to
2e1c49f
Compare
Signed-off-by: AilinKid <[email protected]>
conditionNC *FastIntSet | ||
} | ||
|
||
// ncEdge is quite simple for remarking the null value relationship between cols, storing it as fdEdge will add complexity of traverse of a closure. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
L47-61 无用
@@ -29,14 +29,48 @@ type fdEdge struct { | |||
// The value of the strict and eq bool forms the four kind of edges: | |||
// functional dependency, lax functional dependency, strict equivalence constraint, lax equivalence constraint. | |||
// And if there's a functional dependency `const` -> `column` exists. We would let the from side be empty. | |||
// Adjustment: when strict is true and equiv is false, it means the edge is a Lax equivalence; when both true, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
L32-37 无用
// STRICT FD: | ||
// A --> C is stronger than AB --> C. --- YES | ||
// A --> BC is stronger than A --> C. --- YES | ||
// | ||
// LAX FD: | ||
// 1: A ~~> C is stronger than AB ~~> C. --- YES | ||
// 2: A ~~> BC is stronger than A ~~> C. --- NO | ||
// |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
L334-L345 无用
// --------------------------------- | ||
// 1 2 1 2 1 1 1 | ||
// 2 2 null null null null null | ||
// 3 3 3 3 null null null |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
L134 should be
3 3 null null null null null
Co-authored-by: Yiding Cui <[email protected]>
Signed-off-by: AilinKid [email protected]
What problem does this PR solve?
order by checking can be divided as two part.
1: one is with no order by clause, which should collect info to identify whether this query is a aggregated query at all. Then judge whether a agg func in order by clause is suitable. (this can be done much earlier)
2: one is with order by clause, which require FD to check whether the item in order by clause is suitable. (this only can be done after projection is built and order item has been rewritten as an expression)
we need more test to cover, do not merge before that
What is changed and how it works?
Check List
Tests
Code changes
Side effects
Related changes
tidb-ansible
repository