From 2baed3e4a48a3cf0bd16db16dd698d6bab1d1d77 Mon Sep 17 00:00:00 2001 From: AilinKid <314806019@qq.com> Date: Fri, 18 Mar 2022 00:02:30 +0800 Subject: [PATCH 01/13] fix order by fd check Signed-off-by: AilinKid <314806019@qq.com> --- planner/core/logical_plan_builder.go | 190 +++++++++++++++++++++++++-- planner/funcdep/extract_fd_test.go | 3 - 2 files changed, 179 insertions(+), 14 deletions(-) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index e413fa308f6d4..4ff30dc918e58 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -1235,8 +1235,9 @@ func findColFromNaturalUsingJoin(p LogicalPlan, col *expression.Column) (name *t } // buildProjection returns a Projection plan and non-aux columns length. -func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, fields []*ast.SelectField, mapper map[*ast.AggregateFuncExpr]int, - windowMapper map[*ast.WindowFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, []expression.Expression, int, error) { +func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *ast.SelectStmt, mapper map[*ast.AggregateFuncExpr]int, + windowMapper map[*ast.WindowFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, []*util.ByItems, int, error) { + fields := sel.Fields.Fields err := b.preprocessUserVarTypes(ctx, p, fields, mapper) if err != nil { return nil, nil, 0, err @@ -1321,6 +1322,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, fields } proj.SetChildren(p) // delay the only-full-group-by-check in create view statement to later query. + var orderByItemExprs []*util.ByItems if !b.isCreateView { fds := proj.ExtractFD() // Projection -> Children -> ... @@ -1421,8 +1423,85 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, fields // for select * from view (include agg), outer projection don't have to check select list with the inner group-by flag. fds.HasAggBuilt = false } + if sel.OrderBy != nil && sel.GroupBy != nil { + // check only-full-group-by 4 order-by clause with group-by clause + orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, mapper, windowMapper, proj.Exprs, oldLen, sel.Distinct) + if err != nil { + return nil, nil, 0, err + } + selectExprsUniqueIDs := fd.NewFastIntSet() + for _, expr := range proj.Exprs[:oldLen] { + switch x := expr.(type) { + case *expression.Column: + selectExprsUniqueIDs.Insert(int(x.UniqueID)) + case *expression.ScalarFunction: + scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) + if !ok { + panic("selected expr must have been registered, shouldn't be here") + } + selectExprsUniqueIDs.Insert(scalarUniqueID) + default: + } + } + for offset, odrItem := range orderByItemExprs { + item := fd.NewFastIntSet() + switch x := odrItem.Expr.(type) { + case *expression.Column: + item.Insert(int(x.UniqueID)) + case *expression.ScalarFunction: + scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) + if !ok { + panic("selected expr must have been registered, shouldn't be here") + } + item.Insert(scalarUniqueID) + default: + } + // Step#1: whether order by item is in the origin select field list + if item.SubsetOf(selectExprsUniqueIDs) { + continue + } + // Step#2: whether order by item is in the group by list + if item.SubsetOf(fds.GroupByCols) { + continue + } + // Step#3: whether order by item is in the FD closure of group-by & select list items. + if item.SubsetOf(fds.ConstantCols()) { + continue + } + strictClosureOfGroupByCols := fds.ClosureOfStrict(fds.GroupByCols) + if item.SubsetOf(strictClosureOfGroupByCols) { + continue + } + strictClosureOfSelectCols := fds.ClosureOfStrict(selectExprsUniqueIDs) + if item.SubsetOf(strictClosureOfSelectCols) { + continue + } + // locate the base col that are not in (constant list / group by list / strict fd closure) for error show. + baseCols := expression.ExtractColumns(odrItem.Expr) + errShowCol := baseCols[0] + for _, col := range baseCols { + colSet := fd.NewFastIntSet(int(col.UniqueID)) + if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) { + errShowCol = col + break + } + } + // better use the schema alias name firstly if any. + name := "" + for idx, schemaCol := range proj.Schema().Columns { + if schemaCol.UniqueID == errShowCol.UniqueID { + name = proj.names[idx].String() + break + } + } + if name == "" { + name = errShowCol.OrigName + } + return nil, nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInSelect, name) + } + } } - return proj, proj.Exprs, oldLen, nil + return proj, orderByItemExprs, oldLen, nil } func (b *PlanBuilder) buildDistinct(child LogicalPlan, length int) (*LogicalAggregation, error) { @@ -1821,6 +1900,32 @@ func (b *PlanBuilder) buildSort(ctx context.Context, p LogicalPlan, byItems []*a return b.buildSortWithCheck(ctx, p, byItems, aggMapper, windowMapper, nil, 0, false) } +func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, + windowMapper map[*ast.WindowFuncExpr]int, projExprs []expression.Expression, oldLen int, hasDistinct bool) ([]*util.ByItems, error) { + exprs := make([]*util.ByItems, 0, len(byItems)) + transformer := &itemTransformer{} + for i, item := range byItems { + newExpr, _ := item.Expr.Accept(transformer) + item.Expr = newExpr.(ast.ExprNode) + it, np, err := b.rewriteWithPreprocess(ctx, item.Expr, p, aggMapper, windowMapper, true, nil) + if err != nil { + return nil, err + } + + // check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442 + if hasDistinct && projExprs != nil { + err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen) + if err != nil { + return nil, err + } + } + + p = np + exprs = append(exprs, &util.ByItems{Expr: it, Desc: item.Desc}) + } + return exprs, nil +} + func (b *PlanBuilder) buildSortWithCheck(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int, projExprs []expression.Expression, oldLen int, hasDistinct bool) (*LogicalSort, error) { if _, isUnion := p.(*LogicalUnionAll); isUnion { @@ -1855,6 +1960,18 @@ func (b *PlanBuilder) buildSortWithCheck(ctx context.Context, p LogicalPlan, byI return sort, nil } +func (b *PlanBuilder) buildSortWithCheckV2(p LogicalPlan, byItems []*util.ByItems) (*LogicalSort, error) { + if _, isUnion := p.(*LogicalUnionAll); isUnion { + b.curClause = globalOrderByClause + } else { + b.curClause = orderByClause + } + sort := LogicalSort{}.Init(b.ctx, b.getSelectOffset()) + sort.ByItems = byItems + sort.SetChildren(p) + return sort, nil +} + // checkOrderByInDistinct checks whether ORDER BY has conflicts with DISTINCT, see #12442 func (b *PlanBuilder) checkOrderByInDistinct(byItem *ast.ByItem, idx int, expr expression.Expression, p LogicalPlan, originalExprs []expression.Expression, length int) error { // Check if expressions in ORDER BY whole match some fields in DISTINCT. @@ -3152,6 +3269,51 @@ func (b *PlanBuilder) checkOnlyFullGroupByWithGroupClause(p LogicalPlan, sel *as return nil } +func (b *PlanBuilder) checkOrderByPart1(sel *ast.SelectStmt) error { + if sel.GroupBy != nil { + // check only-full-group-by 4 order-by clause with group-by clause + // Step#1: whether order by item is in the select field list + // Step#2: whether order by item is in the group by list + // Step#3: whether order by item is in the FD closure of group-by & select list items. + // Since FD hasn't been built yet for now, let's delay this logic to checkOrderByPart2. + } else { + // check only-full-group-by 4 order-by clause without group-by clause + // Step1: judge whether this query is an aggregated query. + // Step2: there is a agg in order by while sql is not an aggregated query, errors. + // Since here doesn't require FD to check, this can take place as soon as possible. + // Step1: + isNonAggregatedQuery := false + resolver := colResolverForOnlyFullGroupBy{ + firstOrderByAggColIdx: -1, + } + resolver.curClause = fieldList + for idx, field := range sel.Fields.Fields { + resolver.exprIdx = idx + field.Accept(&resolver) + } + if sel.Having != nil { + sel.Having.Expr.Accept(&resolver) + } + if !resolver.hasAggFunc { + isNonAggregatedQuery = true + } + + // Step2: + if sel.OrderBy != nil { + resolver.curClause = orderByClause + for idx, byItem := range sel.OrderBy.Items { + resolver.exprIdx = idx + byItem.Expr.Accept(&resolver) + } + } + if isNonAggregatedQuery && resolver.firstOrderByAggColIdx != -1 { + // SQL like `select a from t where a = 1 order by count(b)` is illegal. + return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(resolver.firstOrderByAggColIdx + 1) + } + } + return nil +} + func (b *PlanBuilder) checkOnlyFullGroupByWithOutGroupClause(p LogicalPlan, sel *ast.SelectStmt) error { resolver := colResolverForOnlyFullGroupBy{ firstOrderByAggColIdx: -1, @@ -3177,7 +3339,7 @@ func (b *PlanBuilder) checkOnlyFullGroupByWithOutGroupClause(p LogicalPlan, sel // SQL like `select a from t where a = 1 order by count(b)` is illegal. return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(resolver.firstOrderByAggColIdx + 1) } - if !resolver.hasAggFuncOrAnyValue || len(resolver.nonAggCols) == 0 { + if !(resolver.hasAggFunc || resolver.hasAnyValue) || len(resolver.nonAggCols) == 0 { return nil } singleValueColNames := make(map[*types.FieldName]struct{}, len(sel.Fields.Fields)) @@ -3223,7 +3385,8 @@ type colResolverForOnlyFullGroupBy struct { nonAggCols []*ast.ColumnName exprIdx int nonAggColIdxs []int - hasAggFuncOrAnyValue bool + hasAggFunc bool + hasAnyValue bool firstOrderByAggColIdx int curClause clauseCode } @@ -3231,7 +3394,7 @@ type colResolverForOnlyFullGroupBy struct { func (c *colResolverForOnlyFullGroupBy) Enter(node ast.Node) (ast.Node, bool) { switch t := node.(type) { case *ast.AggregateFuncExpr: - c.hasAggFuncOrAnyValue = true + c.hasAggFunc = true if c.curClause == orderByClause { c.firstOrderByAggColIdx = c.exprIdx } @@ -3239,7 +3402,7 @@ func (c *colResolverForOnlyFullGroupBy) Enter(node ast.Node) (ast.Node, bool) { case *ast.FuncCallExpr: // enable function `any_value` in aggregation even `ONLY_FULL_GROUP_BY` is set if t.FnName.L == ast.AnyValue { - c.hasAggFuncOrAnyValue = true + c.hasAnyValue = true return node, true } case *ast.ColumnNameExpr: @@ -3721,7 +3884,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L windowAggMap map[*ast.AggregateFuncExpr]int correlatedAggMap map[*ast.AggregateFuncExpr]int gbyCols []expression.Expression - projExprs []expression.Expression + orderByItems []*util.ByItems ) // set for update read to true before building result set node @@ -3762,6 +3925,11 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L originalFields = sel.Fields.Fields } + // basic order-by checking, nothing to do with only-full-group-by mode + if err := b.checkOrderByPart1(sel); err != nil { + return nil, err + } + if sel.GroupBy != nil { p, gbyCols, err = b.resolveGbyExprs(ctx, p, sel.GroupBy, sel.Fields.Fields) if err != nil { @@ -3865,7 +4033,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L var oldLen int // According to https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html, // we can only process window functions after having clause, so `considerWindow` is false now. - p, projExprs, oldLen, err = b.buildProjection(ctx, p, sel.Fields.Fields, totalMap, nil, false, sel.OrderBy != nil) + p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, false, sel.OrderBy != nil) if err != nil { return nil, err } @@ -3903,7 +4071,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L // In such case plan `p` is not changed, so we don't have to build another projection. if hasWindowFuncField { // Now we build the window function fields. - p, projExprs, oldLen, err = b.buildProjection(ctx, p, sel.Fields.Fields, windowAggMap, windowMapper, true, false) + p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, true, false) if err != nil { return nil, err } @@ -3919,7 +4087,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L if sel.OrderBy != nil { if b.ctx.GetSessionVars().SQLMode.HasOnlyFullGroupBy() { - p, err = b.buildSortWithCheck(ctx, p, sel.OrderBy.Items, orderMap, windowMapper, projExprs, oldLen, sel.Distinct) + p, err = b.buildSortWithCheckV2(p, orderByItems) } else { p, err = b.buildSort(ctx, p, sel.OrderBy.Items, orderMap, windowMapper) } diff --git a/planner/funcdep/extract_fd_test.go b/planner/funcdep/extract_fd_test.go index 84a1533e161eb..99e4dd593698a 100644 --- a/planner/funcdep/extract_fd_test.go +++ b/planner/funcdep/extract_fd_test.go @@ -349,9 +349,6 @@ func TestFDSet_MakeOuterJoin(t *testing.T) { ctx := context.TODO() is := testGetIS(ass, tk.Session()) for i, tt := range tests { - if i == 0 { - fmt.Println(1) - } comment := fmt.Sprintf("case:%v sql:%s", i, tt.sql) stmt, err := par.ParseOneStmt(tt.sql, "", "") ass.Nil(err, comment) From b7fc20da4fd852bf1767193d73a941e94ed19cbf Mon Sep 17 00:00:00 2001 From: ailinkid <314806019@qq.com> Date: Fri, 18 Mar 2022 16:11:48 +0800 Subject: [PATCH 02/13] make test pass Signed-off-by: ailinkid <314806019@qq.com> --- planner/core/logical_plan_builder.go | 232 ++++++++++++++------- planner/funcdep/only_full_group_by_test.go | 6 +- 2 files changed, 164 insertions(+), 74 deletions(-) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 4ff30dc918e58..71bf0ff705d7f 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -1423,12 +1423,10 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a // for select * from view (include agg), outer projection don't have to check select list with the inner group-by flag. fds.HasAggBuilt = false } - if sel.OrderBy != nil && sel.GroupBy != nil { - // check only-full-group-by 4 order-by clause with group-by clause - orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, mapper, windowMapper, proj.Exprs, oldLen, sel.Distinct) - if err != nil { - return nil, nil, 0, err - } + if strings.HasPrefix(b.ctx.GetSessionVars().StmtCtx.OriginalSQL, "SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1") { + fmt.Println(1) + } + if sel.OrderBy != nil { selectExprsUniqueIDs := fd.NewFastIntSet() for _, expr := range proj.Exprs[:oldLen] { switch x := expr.(type) { @@ -1443,61 +1441,126 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a default: } } - for offset, odrItem := range orderByItemExprs { - item := fd.NewFastIntSet() - switch x := odrItem.Expr.(type) { - case *expression.Column: - item.Insert(int(x.UniqueID)) - case *expression.ScalarFunction: - scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) - if !ok { - panic("selected expr must have been registered, shouldn't be here") + // check only-full-group-by 4 order-by clause with group-by clause + orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, mapper, windowMapper, proj.Exprs, oldLen, sel.Distinct) + if err != nil { + return nil, nil, 0, err + } + if sel.GroupBy != nil { + for offset, odrItem := range orderByItemExprs { + item := fd.NewFastIntSet() + switch x := odrItem.Expr.(type) { + case *expression.Column: + item.Insert(int(x.UniqueID)) + case *expression.ScalarFunction: + scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) + if !ok { + panic("selected expr must have been registered, shouldn't be here") + } + item.Insert(scalarUniqueID) + default: } - item.Insert(scalarUniqueID) - default: - } - // Step#1: whether order by item is in the origin select field list - if item.SubsetOf(selectExprsUniqueIDs) { - continue - } - // Step#2: whether order by item is in the group by list - if item.SubsetOf(fds.GroupByCols) { - continue - } - // Step#3: whether order by item is in the FD closure of group-by & select list items. - if item.SubsetOf(fds.ConstantCols()) { - continue - } - strictClosureOfGroupByCols := fds.ClosureOfStrict(fds.GroupByCols) - if item.SubsetOf(strictClosureOfGroupByCols) { - continue - } - strictClosureOfSelectCols := fds.ClosureOfStrict(selectExprsUniqueIDs) - if item.SubsetOf(strictClosureOfSelectCols) { - continue - } - // locate the base col that are not in (constant list / group by list / strict fd closure) for error show. - baseCols := expression.ExtractColumns(odrItem.Expr) - errShowCol := baseCols[0] - for _, col := range baseCols { - colSet := fd.NewFastIntSet(int(col.UniqueID)) - if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) { - errShowCol = col - break + // Step#1: whether order by item is in the origin select field list + if item.SubsetOf(selectExprsUniqueIDs) { + continue } - } - // better use the schema alias name firstly if any. - name := "" - for idx, schemaCol := range proj.Schema().Columns { - if schemaCol.UniqueID == errShowCol.UniqueID { - name = proj.names[idx].String() - break + // Step#2: whether order by item is in the group by list + if item.SubsetOf(fds.GroupByCols) { + continue + } + // Step#3: whether order by item is in the FD closure of group-by & select list items. + if item.SubsetOf(fds.ConstantCols()) { + continue + } + strictClosureOfGroupByCols := fds.ClosureOfStrict(fds.GroupByCols) + if item.SubsetOf(strictClosureOfGroupByCols) { + continue + } + strictClosureOfSelectCols := fds.ClosureOfStrict(selectExprsUniqueIDs) + if item.SubsetOf(strictClosureOfSelectCols) { + continue + } + // locate the base col that are not in (constant list / group by list / strict fd closure) for error show. + baseCols := expression.ExtractColumns(odrItem.Expr) + errShowCol := baseCols[0] + for _, col := range baseCols { + colSet := fd.NewFastIntSet(int(col.UniqueID)) + if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) { + errShowCol = col + break + } + } + // better use the schema alias name firstly if any. + name := "" + for idx, schemaCol := range proj.Schema().Columns { + if schemaCol.UniqueID == errShowCol.UniqueID { + name = proj.names[idx].String() + break + } } + if name == "" { + name = errShowCol.OrigName + } + return nil, nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInOrderBy, name) } - if name == "" { - name = errShowCol.OrigName + } + if sel.Distinct { + // order-by with distinct case + // Rule #1: order by item should be in the select filed list + // Rule #2: the base col that order by item dependent on should be in the select field list + for offset, odrItem := range orderByItemExprs { + item := fd.NewFastIntSet() + switch x := odrItem.Expr.(type) { + case *expression.Column: + item.Insert(int(x.UniqueID)) + case *expression.ScalarFunction: + scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) + if !ok { + panic("selected expr must have been registered, shouldn't be here") + } + item.Insert(scalarUniqueID) + default: + } + // Rule #1 + if item.SubsetOf(selectExprsUniqueIDs) { + continue + } + // Rule #2 + baseCols := expression.ExtractColumns(odrItem.Expr) + colSet := fd.NewFastIntSet() + for _, col := range baseCols { + colSet.Insert(int(col.UniqueID)) + } + if colSet.SubsetOf(selectExprsUniqueIDs) { + continue + } + // find that error base col + errShowCol := baseCols[0] + for _, col := range baseCols { + colSet := fd.NewFastIntSet() + colSet.Insert(int(col.UniqueID)) + if !colSet.SubsetOf(selectExprsUniqueIDs) { + errShowCol = col + break + } + } + // better use the schema alias name firstly if any. + name := "" + for idx, schemaCol := range proj.Schema().Columns { + if schemaCol.UniqueID == errShowCol.UniqueID { + name = proj.names[idx].String() + break + } + } + if name == "" { + name = errShowCol.OrigName + } + if _, ok := sel.OrderBy.Items[offset].Expr.(*ast.AggregateFuncExpr); ok { + return nil, nil, 0, ErrAggregateInOrderNotSelect.GenWithStackByArgs(offset+1, "DISTINCT") + } + // select distinct count(a) from t group by b order by sum(a); ✗ + return nil, nil, 0, ErrFieldInOrderNotSelect.GenWithStackByArgs(offset+1, name, "DISTINCT") } - return nil, nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInSelect, name) } } } @@ -1904,7 +1967,7 @@ func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byIt windowMapper map[*ast.WindowFuncExpr]int, projExprs []expression.Expression, oldLen int, hasDistinct bool) ([]*util.ByItems, error) { exprs := make([]*util.ByItems, 0, len(byItems)) transformer := &itemTransformer{} - for i, item := range byItems { + for _, item := range byItems { newExpr, _ := item.Expr.Accept(transformer) item.Expr = newExpr.(ast.ExprNode) it, np, err := b.rewriteWithPreprocess(ctx, item.Expr, p, aggMapper, windowMapper, true, nil) @@ -1913,12 +1976,12 @@ func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byIt } // check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442 - if hasDistinct && projExprs != nil { - err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen) - if err != nil { - return nil, err - } - } + //if hasDistinct && projExprs != nil { + // err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen) + // if err != nil { + // return nil, err + // } + //} p = np exprs = append(exprs, &util.ByItems{Expr: it, Desc: item.Desc}) @@ -2140,6 +2203,10 @@ func matchField(f *ast.SelectField, col *ast.ColumnNameExpr, ignoreAsName bool) return false } +func resolveFromOuterSchema(v *ast.ColumnNameExpr) bool { + return false +} + func resolveFromSelectFields(v *ast.ColumnNameExpr, fields []*ast.SelectField, ignoreAsName bool) (index int, err error) { var matchedExpr ast.ExprNode index = -1 @@ -3269,7 +3336,7 @@ func (b *PlanBuilder) checkOnlyFullGroupByWithGroupClause(p LogicalPlan, sel *as return nil } -func (b *PlanBuilder) checkOrderByPart1(sel *ast.SelectStmt) error { +func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt, p LogicalPlan) error { if sel.GroupBy != nil { // check only-full-group-by 4 order-by clause with group-by clause // Step#1: whether order by item is in the select field list @@ -3299,16 +3366,38 @@ func (b *PlanBuilder) checkOrderByPart1(sel *ast.SelectStmt) error { } // Step2: - if sel.OrderBy != nil { + if sel.OrderBy != nil && isNonAggregatedQuery { + // YES: SQL like `select a from t where a = 1 order by count(b)` is illegal. + // NO: SELECT t1.a FROM t1 GROUP BY t1.a HAVING t1.a IN (SELECT t2.a FROM t2 ORDER BY SUM(t1.b)) is ok. + // judge whether the aggregated func in order by item is referred to outer schema. resolver.curClause = orderByClause + resolver.orderByAggMap = make(map[*ast.AggregateFuncExpr]int, len(sel.OrderBy.Items)) for idx, byItem := range sel.OrderBy.Items { resolver.exprIdx = idx byItem.Expr.Accept(&resolver) } - } - if isNonAggregatedQuery && resolver.firstOrderByAggColIdx != -1 { - // SQL like `select a from t where a = 1 order by count(b)` is illegal. - return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(resolver.firstOrderByAggColIdx + 1) + // has no order-by agg. + if len(resolver.orderByAggMap) == 0 { + return nil + } + // has some order-by agg. + for k, v := range resolver.orderByAggMap { + var cols []*expression.Column + for _, arg := range k.Args { + newArg, np, err := b.rewrite(ctx, arg, p, nil, true) + if err != nil { + return err + } + p = np + cols = append(cols, expression.ExtractColumns(newArg)...) + } + // use outer logical query block correlated cols --- valid + if len(cols) == 0 { + continue + } + // use logical query block cols --- invalid + return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(v + 1) + } } } return nil @@ -3389,6 +3478,7 @@ type colResolverForOnlyFullGroupBy struct { hasAnyValue bool firstOrderByAggColIdx int curClause clauseCode + orderByAggMap map[*ast.AggregateFuncExpr]int } func (c *colResolverForOnlyFullGroupBy) Enter(node ast.Node) (ast.Node, bool) { @@ -3396,7 +3486,7 @@ func (c *colResolverForOnlyFullGroupBy) Enter(node ast.Node) (ast.Node, bool) { case *ast.AggregateFuncExpr: c.hasAggFunc = true if c.curClause == orderByClause { - c.firstOrderByAggColIdx = c.exprIdx + c.orderByAggMap[t] = c.exprIdx } return node, true case *ast.FuncCallExpr: @@ -3926,7 +4016,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L } // basic order-by checking, nothing to do with only-full-group-by mode - if err := b.checkOrderByPart1(sel); err != nil { + if err := b.checkOrderByPart1(ctx, sel, p); err != nil { return nil, err } diff --git a/planner/funcdep/only_full_group_by_test.go b/planner/funcdep/only_full_group_by_test.go index cbb4c9eb6a50f..b519436beabff 100644 --- a/planner/funcdep/only_full_group_by_test.go +++ b/planner/funcdep/only_full_group_by_test.go @@ -72,13 +72,13 @@ func TestOnlyFullGroupByOldCases(t *testing.T) { require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t1.c' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") _, err = tk.Exec("SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1;") require.NotNil(t, err) - require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t.d' which is not in SELECT list; this is incompatible with DISTINCT") + require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t1.d' which is not in SELECT list; this is incompatible with DISTINCT") _, err = tk.Exec("SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1;") require.NotNil(t, err) - require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t.d' which is not in SELECT list; this is incompatible with DISTINCT") + require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t1.d' which is not in SELECT list; this is incompatible with DISTINCT") _, err = tk.Exec("SELECT (SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1) FROM t as t2;") require.NotNil(t, err) - require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t.d' which is not in SELECT list; this is incompatible with DISTINCT") + require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t1.d' which is not in SELECT list; this is incompatible with DISTINCT") // test case 7 tk.MustExec("drop table if exists t") From 2e1c49f0b13b3c9eee84faae8b5e0a4dc2eea1f0 Mon Sep 17 00:00:00 2001 From: ailinkid <314806019@qq.com> Date: Sat, 19 Mar 2022 22:55:55 +0800 Subject: [PATCH 03/13] fix orm django agg test case Signed-off-by: ailinkid <314806019@qq.com> --- planner/core/logical_plan_builder.go | 40 +++++++++++++++++----- planner/core/logical_plans.go | 51 ++++++++++++++++++++++++---- planner/core/plan.go | 7 +++- planner/funcdep/fd_graph.go | 25 ++++++++------ 4 files changed, 96 insertions(+), 27 deletions(-) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 71bf0ff705d7f..c56547df032c8 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -1236,7 +1236,7 @@ func findColFromNaturalUsingJoin(p LogicalPlan, col *expression.Column) (name *t // buildProjection returns a Projection plan and non-aux columns length. func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *ast.SelectStmt, mapper map[*ast.AggregateFuncExpr]int, - windowMapper map[*ast.WindowFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, []*util.ByItems, int, error) { + windowMapper map[*ast.WindowFuncExpr]int, orderMapper map[*ast.AggregateFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, []*util.ByItems, int, error) { fields := sel.Fields.Fields err := b.preprocessUserVarTypes(ctx, p, fields, mapper) if err != nil { @@ -1423,9 +1423,6 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a // for select * from view (include agg), outer projection don't have to check select list with the inner group-by flag. fds.HasAggBuilt = false } - if strings.HasPrefix(b.ctx.GetSessionVars().StmtCtx.OriginalSQL, "SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1") { - fmt.Println(1) - } if sel.OrderBy != nil { selectExprsUniqueIDs := fd.NewFastIntSet() for _, expr := range proj.Exprs[:oldLen] { @@ -1442,7 +1439,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a } } // check only-full-group-by 4 order-by clause with group-by clause - orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, mapper, windowMapper, proj.Exprs, oldLen, sel.Distinct) + orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, orderMapper, windowMapper, proj.Exprs, oldLen, sel.Distinct) if err != nil { return nil, nil, 0, err } @@ -1453,11 +1450,15 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a case *expression.Column: item.Insert(int(x.UniqueID)) case *expression.ScalarFunction: + // order by item may not be projected as a column in projection, allocated a new one scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) if !ok { - panic("selected expr must have been registered, shouldn't be here") + // panic("selected expr must have been registered, shouldn't be here") + scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID()) } item.Insert(scalarUniqueID) + case *expression.Constant: + // order by null/false/true can always be ok, let item be empty. default: } // Step#1: whether order by item is in the origin select field list @@ -1482,6 +1483,21 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a } // locate the base col that are not in (constant list / group by list / strict fd closure) for error show. baseCols := expression.ExtractColumns(odrItem.Expr) + if len(baseCols) == 0 { + // order by item has no reference of base col, like order by abs(1) and rand() + continue + } + // GROUP BY t1.a, t2.b ORDER BY COALESCE(MIN(t.c), t2.b), agg min is determined by group-col definitely. + baseColsUniqueIDs := fd.NewFastIntSet() + for _, bc := range baseCols { + baseColsUniqueIDs.Insert(int(bc.UniqueID)) + } + if baseColsUniqueIDs.SubsetOf(strictClosureOfGroupByCols) { + continue + } + if baseColsUniqueIDs.SubsetOf(strictClosureOfSelectCols) { + continue + } errShowCol := baseCols[0] for _, col := range baseCols { colSet := fd.NewFastIntSet(int(col.UniqueID)) @@ -1514,9 +1530,11 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a case *expression.Column: item.Insert(int(x.UniqueID)) case *expression.ScalarFunction: + // order by item may not be projected as a column in projection, allocated a new one scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) if !ok { - panic("selected expr must have been registered, shouldn't be here") + // panic("selected expr must have been registered, shouldn't be here") + scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID()) } item.Insert(scalarUniqueID) default: @@ -1527,6 +1545,10 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a } // Rule #2 baseCols := expression.ExtractColumns(odrItem.Expr) + if len(baseCols) == 0 { + // order by item has no reference of base col, like order by abs(1) and rand() + continue + } colSet := fd.NewFastIntSet() for _, col := range baseCols { colSet.Insert(int(col.UniqueID)) @@ -4123,7 +4145,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L var oldLen int // According to https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html, // we can only process window functions after having clause, so `considerWindow` is false now. - p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, false, sel.OrderBy != nil) + p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, orderMap, false, sel.OrderBy != nil) if err != nil { return nil, err } @@ -4161,7 +4183,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L // In such case plan `p` is not changed, so we don't have to build another projection. if hasWindowFuncField { // Now we build the window function fields. - p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, true, false) + p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, orderMap, true, false) if err != nil { return nil, err } diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index fe869760202cd..04d9ae146542d 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -15,6 +15,7 @@ package core import ( + "fmt" "math" "github.com/pingcap/tidb/expression" @@ -198,6 +199,11 @@ func (p *LogicalJoin) extractFDForSemiJoin(filtersFromApply []expression.Express // 1: since semi join will keep the part or all rows of the outer table, it's outer FD can be saved. // 2: the un-projected column will be left for the upper layer projection or already be pruned from bottom up. outerFD, _ := p.children[0].ExtractFD(), p.children[1].ExtractFD() + outerAcrossBlock := p.SelectBlockOffset() != p.children[0].SelectBlockOffset() + if outerAcrossBlock { + outerFD.HasAggBuilt = false + outerFD.GroupByCols.Clear() + } fds := outerFD eqCondSlice := expression.ScalarFuncs2Exprs(p.EqualConditions) @@ -215,6 +221,11 @@ func (p *LogicalJoin) extractFDForSemiJoin(filtersFromApply []expression.Express func (p *LogicalJoin) extractFDForInnerJoin(filtersFromApply []expression.Expression) *fd.FDSet { leftFD, rightFD := p.children[0].ExtractFD(), p.children[1].ExtractFD() + leftAcrossBlock, rightAcrossBlock := p.SelectBlockOffset() != p.children[0].SelectBlockOffset(), p.SelectBlockOffset() != p.children[1].SelectBlockOffset() + if leftAcrossBlock { + leftFD.HasAggBuilt = false + leftFD.GroupByCols.Clear() + } fds := leftFD fds.MakeCartesianProduct(rightFD) @@ -245,16 +256,19 @@ func (p *LogicalJoin) extractFDForInnerJoin(filtersFromApply []expression.Expres fds.HashCodeToUniqueID[k] = v } } - for i, ok := rightFD.GroupByCols.Next(0); ok; i, ok = rightFD.GroupByCols.Next(i + 1) { - fds.GroupByCols.Insert(i) + if !rightAcrossBlock { + for i, ok := rightFD.GroupByCols.Next(0); ok; i, ok = rightFD.GroupByCols.Next(i + 1) { + fds.GroupByCols.Insert(i) + } + fds.HasAggBuilt = fds.HasAggBuilt || rightFD.HasAggBuilt } - fds.HasAggBuilt = fds.HasAggBuilt || rightFD.HasAggBuilt p.fdSet = fds return fds } func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expression) *fd.FDSet { outerFD, innerFD := p.children[0].ExtractFD(), p.children[1].ExtractFD() + outerAcrossBlock, innerAcrossBlock := p.SelectBlockOffset() != p.children[0].SelectBlockOffset(), p.SelectBlockOffset() != p.children[1].SelectBlockOffset() innerCondition := p.RightConditions outerCondition := p.LeftConditions outerCols, innerCols := fd.NewFastIntSet(), fd.NewFastIntSet() @@ -266,6 +280,7 @@ func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expres } if p.JoinType == RightOuterJoin { innerFD, outerFD = outerFD, innerFD + outerAcrossBlock, innerAcrossBlock = innerAcrossBlock, outerAcrossBlock innerCondition = p.LeftConditions outerCondition = p.RightConditions innerCols, outerCols = outerCols, innerCols @@ -346,9 +361,12 @@ func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expres } } } - + if outerAcrossBlock { + outerFD.HasAggBuilt = false + outerFD.GroupByCols.Clear() + } fds := outerFD - fds.MakeOuterJoin(innerFD, filterFD, outerCols, innerCols, &opt) + fds.MakeOuterJoin(innerFD, filterFD, outerCols, innerCols, &opt, innerAcrossBlock) p.fdSet = fds return fds } @@ -966,7 +984,7 @@ func (p *LogicalSelection) ExtractFD() *fd.FDSet { // join's schema will miss t2.a while join.full schema has. since selection // itself doesn't contain schema, extracting schema should tell them apart. var columns []*expression.Column - if join, ok := p.children[0].(*LogicalJoin); ok { + if join, ok := p.children[0].(*LogicalJoin); ok && join.fullSchema != nil { columns = join.fullSchema.Columns } else { columns = p.Schema().Columns @@ -1059,11 +1077,30 @@ func (la *LogicalApply) ExtractFD() *fd.FDSet { } } } + // select (select t1.a from t1 where t1.rid = t2.id), count(t2.b) from t2 group by (t2.id) + // for correlated scalar sub-query, the whole sub-query will be projected as a new column for example X here. + // while for every same t2.id, this sub-query's scalar output must be the same, actually it's a kind of strict FD here. + applyStrictDetermine := fd.NewFastIntSet() + applyStrictDependency := fd.NewFastIntSet() + if innerPlan.Schema().Len() == 1 { + // single column in apply join inner side will be output directly. + for _, cc := range deduplicateCorrelatedCols { + applyStrictDetermine.Insert(int(cc.UniqueID)) + } + applyStrictDependency.Insert(int(innerPlan.Schema().Columns[0].UniqueID)) + + } else { + // multi columns will be wrapped as a new row func, actually errors like: Operand should contain 1 column(s) + } + switch la.JoinType { case InnerJoin: return la.extractFDForInnerJoin(eqCond) case LeftOuterJoin, RightOuterJoin: - return la.extractFDForOuterJoin(eqCond) + fds := la.extractFDForOuterJoin(eqCond) + fds.AddStrictFunctionalDependency(applyStrictDetermine, applyStrictDependency) + fmt.Println("cao: from", applyStrictDetermine.String(), "to", applyStrictDependency) + return fds case SemiJoin: return la.extractFDForSemiJoin(eqCond) default: diff --git a/planner/core/plan.go b/planner/core/plan.go index 44cb793862800..216dc4c3c1654 100644 --- a/planner/core/plan.go +++ b/planner/core/plan.go @@ -386,8 +386,13 @@ func (p *baseLogicalPlan) ExtractFD() *fd.FDSet { return p.fdSet } fds := &fd.FDSet{HashCodeToUniqueID: make(map[string]int)} + // isolation between different logical selection blocks. + acrossBlock := false for _, ch := range p.children { - fds.AddFrom(ch.ExtractFD()) + if p.SelectBlockOffset() != ch.SelectBlockOffset() { + acrossBlock = true + } + fds.AddFrom(ch.ExtractFD(), acrossBlock) } return fds } diff --git a/planner/funcdep/fd_graph.go b/planner/funcdep/fd_graph.go index 22c90bcab4411..243f2eb4f8043 100644 --- a/planner/funcdep/fd_graph.go +++ b/planner/funcdep/fd_graph.go @@ -57,6 +57,7 @@ type FDSet struct { // // why not just makeNotNull of them, because even a non-equiv-related inner col can also // refuse supplied null values. + // todo: when multi join and across select block, this may need to be maintained more precisely. Rule333Equiv struct { Edges []*fdEdge InnerCols FastIntSet @@ -672,14 +673,14 @@ func (s *FDSet) MakeCartesianProduct(rhs *FDSet) { // - If the right side has no row, we would supply null-extended rows, then the value of any column is NULL, the equivalence class exists. // - If the right side has rows, no row is filtered out after the filters since no row of the outer side is filtered out. Hence, the equivalence class is still remained. // -func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols FastIntSet, opt *ArgOpts) { +func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols FastIntSet, opt *ArgOpts, innerAcrossBlock bool) { // copy down the left PK and right PK before the s has changed for later usage. leftPK, ok1 := s.FindPrimaryKey() rightPK, ok2 := innerFDs.FindPrimaryKey() copyLeftFDSet := &FDSet{} - copyLeftFDSet.AddFrom(s) + copyLeftFDSet.AddFrom(s, false) copyRightFDSet := &FDSet{} - copyRightFDSet.AddFrom(innerFDs) + copyRightFDSet.AddFrom(innerFDs, false) for _, edge := range innerFDs.fdEdges { // Rule #2.2, constant FD are removed from right side of left join. @@ -816,10 +817,12 @@ func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols F s.HashCodeToUniqueID[k] = v } } - for i, ok := innerFDs.GroupByCols.Next(0); ok; i, ok = innerFDs.GroupByCols.Next(i + 1) { - s.GroupByCols.Insert(i) + if !innerAcrossBlock { + for i, ok := innerFDs.GroupByCols.Next(0); ok; i, ok = innerFDs.GroupByCols.Next(i + 1) { + s.GroupByCols.Insert(i) + } + s.HasAggBuilt = s.HasAggBuilt || innerFDs.HasAggBuilt } - s.HasAggBuilt = s.HasAggBuilt || innerFDs.HasAggBuilt } func (s *FDSet) MakeRestoreRule333() { @@ -879,7 +882,7 @@ func (s FDSet) AllCols() FastIntSet { // AddFrom merges two FD sets by adding each FD from the given set to this set. // Since two different tables may have some column ID overlap, we better use // column unique ID to build the FDSet instead. -func (s *FDSet) AddFrom(fds *FDSet) { +func (s *FDSet) AddFrom(fds *FDSet, acrossBlock bool) { for i := range fds.fdEdges { fd := fds.fdEdges[i] if fd.equiv { @@ -903,10 +906,12 @@ func (s *FDSet) AddFrom(fds *FDSet) { s.HashCodeToUniqueID[k] = v } } - for i, ok := fds.GroupByCols.Next(0); ok; i, ok = fds.GroupByCols.Next(i + 1) { - s.GroupByCols.Insert(i) + if !acrossBlock { + for i, ok := fds.GroupByCols.Next(0); ok; i, ok = fds.GroupByCols.Next(i + 1) { + s.GroupByCols.Insert(i) + } + s.HasAggBuilt = fds.HasAggBuilt } - s.HasAggBuilt = fds.HasAggBuilt s.Rule333Equiv = fds.Rule333Equiv } From e2f712f20e3f18af577be686b8204820f80b668d Mon Sep 17 00:00:00 2001 From: ailinkid <314806019@qq.com> Date: Wed, 23 Mar 2022 17:18:34 +0800 Subject: [PATCH 04/13] fix order by should see new p.schema after order item is built Signed-off-by: ailinkid <314806019@qq.com> --- planner/core/logical_plan_builder.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index c56547df032c8..f6be15ad021a1 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -1321,6 +1321,8 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a } } proj.SetChildren(p) + var resultP LogicalPlan + resultP = proj // delay the only-full-group-by-check in create view statement to later query. var orderByItemExprs []*util.ByItems if !b.isCreateView { @@ -1438,8 +1440,11 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a default: } } + if strings.HasPrefix(b.ctx.GetSessionVars().StmtCtx.OriginalSQL, "SELECT SUM(a) FROM t1 ORDER BY (SELECT") { + fmt.Println(1) + } // check only-full-group-by 4 order-by clause with group-by clause - orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, orderMapper, windowMapper, proj.Exprs, oldLen, sel.Distinct) + resultP, orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, orderMapper, windowMapper, proj.Exprs, oldLen, sel.Distinct) if err != nil { return nil, nil, 0, err } @@ -1586,7 +1591,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a } } } - return proj, orderByItemExprs, oldLen, nil + return resultP, orderByItemExprs, oldLen, nil } func (b *PlanBuilder) buildDistinct(child LogicalPlan, length int) (*LogicalAggregation, error) { @@ -1986,7 +1991,7 @@ func (b *PlanBuilder) buildSort(ctx context.Context, p LogicalPlan, byItems []*a } func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, - windowMapper map[*ast.WindowFuncExpr]int, projExprs []expression.Expression, oldLen int, hasDistinct bool) ([]*util.ByItems, error) { + windowMapper map[*ast.WindowFuncExpr]int, projExprs []expression.Expression, oldLen int, hasDistinct bool) (LogicalPlan, []*util.ByItems, error) { exprs := make([]*util.ByItems, 0, len(byItems)) transformer := &itemTransformer{} for _, item := range byItems { @@ -1994,7 +1999,7 @@ func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byIt item.Expr = newExpr.(ast.ExprNode) it, np, err := b.rewriteWithPreprocess(ctx, item.Expr, p, aggMapper, windowMapper, true, nil) if err != nil { - return nil, err + return nil, nil, err } // check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442 @@ -2008,7 +2013,7 @@ func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byIt p = np exprs = append(exprs, &util.ByItems{Expr: it, Desc: item.Desc}) } - return exprs, nil + return p, exprs, nil } func (b *PlanBuilder) buildSortWithCheck(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int, @@ -3368,7 +3373,7 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt } else { // check only-full-group-by 4 order-by clause without group-by clause // Step1: judge whether this query is an aggregated query. - // Step2: there is a agg in order by while sql is not an aggregated query, errors. + // Step2: there is an agg in order by while sql is not an aggregated query, errors. // Since here doesn't require FD to check, this can take place as soon as possible. // Step1: isNonAggregatedQuery := false From 20e1a234539938592dd52a2eca6761eb71c2e71c Mon Sep 17 00:00:00 2001 From: ailinkid <314806019@qq.com> Date: Thu, 24 Mar 2022 17:18:22 +0800 Subject: [PATCH 05/13] fix corrleated column in order by clause can't be name-resolved Signed-off-by: ailinkid <314806019@qq.com> --- planner/core/logical_plan_builder.go | 82 ++++++++++++++++++---- planner/funcdep/fd_graph_ported_test.go | 2 +- planner/funcdep/only_full_group_by_test.go | 14 ++++ 3 files changed, 82 insertions(+), 16 deletions(-) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index f6be15ad021a1..0d52e583e1b61 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -1440,9 +1440,6 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a default: } } - if strings.HasPrefix(b.ctx.GetSessionVars().StmtCtx.OriginalSQL, "SELECT SUM(a) FROM t1 ORDER BY (SELECT") { - fmt.Println(1) - } // check only-full-group-by 4 order-by clause with group-by clause resultP, orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, orderMapper, windowMapper, proj.Exprs, oldLen, sel.Distinct) if err != nil { @@ -3377,38 +3374,60 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt // Since here doesn't require FD to check, this can take place as soon as possible. // Step1: isNonAggregatedQuery := false - resolver := colResolverForOnlyFullGroupBy{ + resolver1 := colResolverForOnlyFullGroupBy{ firstOrderByAggColIdx: -1, } - resolver.curClause = fieldList + resolver1.curClause = fieldList for idx, field := range sel.Fields.Fields { - resolver.exprIdx = idx - field.Accept(&resolver) + resolver1.exprIdx = idx + field.Accept(&resolver1) } if sel.Having != nil { - sel.Having.Expr.Accept(&resolver) + sel.Having.Expr.Accept(&resolver1) } - if !resolver.hasAggFunc { + if !resolver1.hasAggFunc { isNonAggregatedQuery = true } // Step2: if sel.OrderBy != nil && isNonAggregatedQuery { + // Temporarily solve the case like: SELECT a FROM t1 ORDER BY (SELECT COUNT(t2.a) FROM t1 AS t2); + // We need to detect the correlated agg in order by clause, and report error in the outer query. + resolver2 := &correlatedAggregateResolver{ + ctx: ctx, + b: b, + outerPlan: p, + correlatedAggFuncs: make([]*ast.AggregateFuncExpr, 0, len(sel.OrderBy.Items)), + } // YES: SQL like `select a from t where a = 1 order by count(b)` is illegal. // NO: SELECT t1.a FROM t1 GROUP BY t1.a HAVING t1.a IN (SELECT t2.a FROM t2 ORDER BY SUM(t1.b)) is ok. // judge whether the aggregated func in order by item is referred to outer schema. - resolver.curClause = orderByClause - resolver.orderByAggMap = make(map[*ast.AggregateFuncExpr]int, len(sel.OrderBy.Items)) + resolver1.curClause = orderByClause + resolver1.orderByAggMap = make(map[*ast.AggregateFuncExpr]int, len(sel.OrderBy.Items)) for idx, byItem := range sel.OrderBy.Items { - resolver.exprIdx = idx - byItem.Expr.Accept(&resolver) + resolver1.exprIdx = idx + byItem.Expr.Accept(&resolver1) + + // detect the correlated agg in sub query. + _, ok := byItem.Expr.Accept(resolver2) + if !ok { + return resolver2.err + } + if len(resolver2.correlatedAggFuncs) > 0 { + for _, one := range resolver2.correlatedAggFuncs { + if _, ok := resolver1.orderByAggMap[one]; !ok { + resolver1.orderByAggMap[one] = idx + } + } + resolver2.correlatedAggFuncs = resolver2.correlatedAggFuncs[:0] + } } // has no order-by agg. - if len(resolver.orderByAggMap) == 0 { + if len(resolver1.orderByAggMap) == 0 { return nil } // has some order-by agg. - for k, v := range resolver.orderByAggMap { + for k, v := range resolver1.orderByAggMap { var cols []*expression.Column for _, arg := range k.Args { newArg, np, err := b.rewrite(ctx, arg, p, nil, true) @@ -3426,6 +3445,39 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(v + 1) } } + // we need extract all current-level-correlated columns from order by sub-query item to select fields if it has. + if sel.OrderBy != nil { + for _, byItem := range sel.OrderBy.Items { + if _, ok := byItem.Expr.(*ast.SubqueryExpr); ok { + // correlated agg will be extracted completely latter. + _, np, err := b.rewrite(ctx, byItem.Expr, p, nil, true) + if err != nil { + return err + } + correlatedCols := ExtractCorrelatedCols4LogicalPlan(np) + for _, cone := range correlatedCols { + for idx, pone := range p.Schema().Columns { + if cone.UniqueID == pone.UniqueID { + pname := p.OutputNames()[idx] + colName := &ast.ColumnName{ + Schema: pname.DBName, + Table: pname.TblName, + Name: pname.ColName, + } + sel.Fields.Fields = append(sel.Fields.Fields, &ast.SelectField{ + Auxiliary: true, + AuxiliaryColInAgg: true, + Expr: &ast.ColumnNameExpr{Name: colName}, + }) + } + } + } + // p = np, don't construct apply and substitute p here once there is a sub-query + // Because here is just like a pre-procession of pulling correlated column from sub-query to outer select fields. + // don't change the basic plan + } + } + } } return nil } diff --git a/planner/funcdep/fd_graph_ported_test.go b/planner/funcdep/fd_graph_ported_test.go index 3ac8371e98c7f..939617e62d128 100644 --- a/planner/funcdep/fd_graph_ported_test.go +++ b/planner/funcdep/fd_graph_ported_test.go @@ -65,7 +65,7 @@ func TestFuncDeps_ColsAreKey(t *testing.T) { loj = *abcde loj.MakeCartesianProduct(mnpq) loj.AddConstants(NewFastIntSet(3)) - loj.MakeOuterJoin(&FDSet{}, &FDSet{}, preservedCols, nullExtendedCols, nil) + loj.MakeOuterJoin(&FDSet{}, &FDSet{}, preservedCols, nullExtendedCols, nil, false) loj.AddEquivalence(NewFastIntSet(1), NewFastIntSet(10)) testcases := []struct { diff --git a/planner/funcdep/only_full_group_by_test.go b/planner/funcdep/only_full_group_by_test.go index b519436beabff..4749b29786e97 100644 --- a/planner/funcdep/only_full_group_by_test.go +++ b/planner/funcdep/only_full_group_by_test.go @@ -230,4 +230,18 @@ func TestOnlyFullGroupByOldCases(t *testing.T) { err = tk.ExecToErr("select b from t1 group by a") require.NotNil(t, err) require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t1.b' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by") + + // fix issue 30025 + tk.MustExec("drop table if exists t1,t2;") + tk.MustExec("CREATE TABLE t1(a INTEGER);") + tk.MustExec("INSERT INTO t1 VALUES (1), (2);") + err = tk.ExecToErr("SELECT a FROM t1 ORDER BY (SELECT COUNT(t1.a) FROM t1 AS t2);") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[planner:3029]Expression #1 of ORDER BY contains aggregate function and applies to the result of a non-aggregated query") + // from sql standard and what postgres does, this query should report more-than-one-row error in execution phase. + // but mysql has done a special check for it, since outer query has exactly only one row, group by clause can be eliminated (so order-by scalar sub-query check is bypassed) + err = tk.QueryToErr("SELECT SUM(a) FROM t1 ORDER BY (SELECT COUNT(t1.a) FROM t1 AS t2);") + require.NotNil(t, err) + require.Equal(t, err.Error(), "[executor:1242]Subquery returns more than 1 row") + tk.MustQuery("SELECT SUM(a) FROM t1 ORDER BY (SELECT COUNT(t2.a) FROM t1 AS t2);") } From 13da49a11da6c8c3ea411ef290080fff2ec420c3 Mon Sep 17 00:00:00 2001 From: ailinkid <314806019@qq.com> Date: Thu, 24 Mar 2022 20:03:35 +0800 Subject: [PATCH 06/13] move order by check logical to buildOrderBy Signed-off-by: ailinkid <314806019@qq.com> --- planner/core/logical_plan_builder.go | 435 ++++++++++++--------------- planner/core/logical_plans.go | 16 +- planner/core/plan.go | 3 +- 3 files changed, 205 insertions(+), 249 deletions(-) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 0d52e583e1b61..a0ffeb5128e24 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -1236,11 +1236,11 @@ func findColFromNaturalUsingJoin(p LogicalPlan, col *expression.Column) (name *t // buildProjection returns a Projection plan and non-aux columns length. func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *ast.SelectStmt, mapper map[*ast.AggregateFuncExpr]int, - windowMapper map[*ast.WindowFuncExpr]int, orderMapper map[*ast.AggregateFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, []*util.ByItems, int, error) { + windowMapper map[*ast.WindowFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, int, error) { fields := sel.Fields.Fields err := b.preprocessUserVarTypes(ctx, p, fields, mapper) if err != nil { - return nil, nil, 0, err + return nil, 0, err } b.optFlag |= flagEliminateProjection b.curClause = fieldList @@ -1270,7 +1270,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a proj.Exprs = append(proj.Exprs, expr) col, name, err := b.buildProjectionField(ctx, p, field, expr) if err != nil { - return nil, nil, 0, err + return nil, 0, err } schema.Append(col) newNames = append(newNames, name) @@ -1278,7 +1278,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a } newExpr, np, err := b.rewriteWithPreprocess(ctx, field.Expr, p, mapper, windowMapper, true, nil) if err != nil { - return nil, nil, 0, err + return nil, 0, err } // For window functions in the order by clause, we will append an field for it. @@ -1294,7 +1294,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a col, name, err := b.buildProjectionField(ctx, p, field, newExpr) if err != nil { - return nil, nil, 0, err + return nil, 0, err } schema.Append(col) newNames = append(newNames, name) @@ -1321,10 +1321,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a } } proj.SetChildren(p) - var resultP LogicalPlan - resultP = proj // delay the only-full-group-by-check in create view statement to later query. - var orderByItemExprs []*util.ByItems if !b.isCreateView { fds := proj.ExtractFD() // Projection -> Children -> ... @@ -1400,195 +1397,15 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a } // Only1Zero is to judge whether it's no-group-by-items case. if !fds.GroupByCols.Only1Zero() { - return nil, nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInSelect, name) + return nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInSelect, name) } else { - return nil, nil, 0, ErrMixOfGroupFuncAndFields.GenWithStackByArgs(offset+1, name) - } - } - if fds.GroupByCols.Only1Zero() { - // maxOneRow is delayed from agg's ExtractFD logic since some details listed in it. - projectionUniqueIDs := fd.NewFastIntSet() - for _, expr := range proj.Exprs { - switch x := expr.(type) { - case *expression.Column: - projectionUniqueIDs.Insert(int(x.UniqueID)) - case *expression.ScalarFunction: - scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) - if !ok { - panic("selected expr must have been registered, shouldn't be here") - } - projectionUniqueIDs.Insert(scalarUniqueID) - } - } - fds.MaxOneRow(projectionUniqueIDs) - } - // for select * from view (include agg), outer projection don't have to check select list with the inner group-by flag. - fds.HasAggBuilt = false - } - if sel.OrderBy != nil { - selectExprsUniqueIDs := fd.NewFastIntSet() - for _, expr := range proj.Exprs[:oldLen] { - switch x := expr.(type) { - case *expression.Column: - selectExprsUniqueIDs.Insert(int(x.UniqueID)) - case *expression.ScalarFunction: - scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) - if !ok { - panic("selected expr must have been registered, shouldn't be here") - } - selectExprsUniqueIDs.Insert(scalarUniqueID) - default: - } - } - // check only-full-group-by 4 order-by clause with group-by clause - resultP, orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, orderMapper, windowMapper, proj.Exprs, oldLen, sel.Distinct) - if err != nil { - return nil, nil, 0, err - } - if sel.GroupBy != nil { - for offset, odrItem := range orderByItemExprs { - item := fd.NewFastIntSet() - switch x := odrItem.Expr.(type) { - case *expression.Column: - item.Insert(int(x.UniqueID)) - case *expression.ScalarFunction: - // order by item may not be projected as a column in projection, allocated a new one - scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) - if !ok { - // panic("selected expr must have been registered, shouldn't be here") - scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID()) - } - item.Insert(scalarUniqueID) - case *expression.Constant: - // order by null/false/true can always be ok, let item be empty. - default: - } - // Step#1: whether order by item is in the origin select field list - if item.SubsetOf(selectExprsUniqueIDs) { - continue - } - // Step#2: whether order by item is in the group by list - if item.SubsetOf(fds.GroupByCols) { - continue - } - // Step#3: whether order by item is in the FD closure of group-by & select list items. - if item.SubsetOf(fds.ConstantCols()) { - continue - } - strictClosureOfGroupByCols := fds.ClosureOfStrict(fds.GroupByCols) - if item.SubsetOf(strictClosureOfGroupByCols) { - continue - } - strictClosureOfSelectCols := fds.ClosureOfStrict(selectExprsUniqueIDs) - if item.SubsetOf(strictClosureOfSelectCols) { - continue - } - // locate the base col that are not in (constant list / group by list / strict fd closure) for error show. - baseCols := expression.ExtractColumns(odrItem.Expr) - if len(baseCols) == 0 { - // order by item has no reference of base col, like order by abs(1) and rand() - continue - } - // GROUP BY t1.a, t2.b ORDER BY COALESCE(MIN(t.c), t2.b), agg min is determined by group-col definitely. - baseColsUniqueIDs := fd.NewFastIntSet() - for _, bc := range baseCols { - baseColsUniqueIDs.Insert(int(bc.UniqueID)) - } - if baseColsUniqueIDs.SubsetOf(strictClosureOfGroupByCols) { - continue - } - if baseColsUniqueIDs.SubsetOf(strictClosureOfSelectCols) { - continue - } - errShowCol := baseCols[0] - for _, col := range baseCols { - colSet := fd.NewFastIntSet(int(col.UniqueID)) - if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) { - errShowCol = col - break - } - } - // better use the schema alias name firstly if any. - name := "" - for idx, schemaCol := range proj.Schema().Columns { - if schemaCol.UniqueID == errShowCol.UniqueID { - name = proj.names[idx].String() - break - } - } - if name == "" { - name = errShowCol.OrigName - } - return nil, nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInOrderBy, name) - } - } - if sel.Distinct { - // order-by with distinct case - // Rule #1: order by item should be in the select filed list - // Rule #2: the base col that order by item dependent on should be in the select field list - for offset, odrItem := range orderByItemExprs { - item := fd.NewFastIntSet() - switch x := odrItem.Expr.(type) { - case *expression.Column: - item.Insert(int(x.UniqueID)) - case *expression.ScalarFunction: - // order by item may not be projected as a column in projection, allocated a new one - scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) - if !ok { - // panic("selected expr must have been registered, shouldn't be here") - scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID()) - } - item.Insert(scalarUniqueID) - default: - } - // Rule #1 - if item.SubsetOf(selectExprsUniqueIDs) { - continue - } - // Rule #2 - baseCols := expression.ExtractColumns(odrItem.Expr) - if len(baseCols) == 0 { - // order by item has no reference of base col, like order by abs(1) and rand() - continue - } - colSet := fd.NewFastIntSet() - for _, col := range baseCols { - colSet.Insert(int(col.UniqueID)) - } - if colSet.SubsetOf(selectExprsUniqueIDs) { - continue - } - // find that error base col - errShowCol := baseCols[0] - for _, col := range baseCols { - colSet := fd.NewFastIntSet() - colSet.Insert(int(col.UniqueID)) - if !colSet.SubsetOf(selectExprsUniqueIDs) { - errShowCol = col - break - } - } - // better use the schema alias name firstly if any. - name := "" - for idx, schemaCol := range proj.Schema().Columns { - if schemaCol.UniqueID == errShowCol.UniqueID { - name = proj.names[idx].String() - break - } - } - if name == "" { - name = errShowCol.OrigName - } - if _, ok := sel.OrderBy.Items[offset].Expr.(*ast.AggregateFuncExpr); ok { - return nil, nil, 0, ErrAggregateInOrderNotSelect.GenWithStackByArgs(offset+1, "DISTINCT") - } - // select distinct count(a) from t group by b order by sum(a); ✗ - return nil, nil, 0, ErrFieldInOrderNotSelect.GenWithStackByArgs(offset+1, name, "DISTINCT") + return nil, 0, ErrMixOfGroupFuncAndFields.GenWithStackByArgs(offset+1, name) } } } + proj.FDChecked = true } - return resultP, orderByItemExprs, oldLen, nil + return proj, oldLen, nil } func (b *PlanBuilder) buildDistinct(child LogicalPlan, length int) (*LogicalAggregation, error) { @@ -1983,12 +1800,8 @@ func (t *itemTransformer) Leave(inNode ast.Node) (ast.Node, bool) { return inNode, false } -func (b *PlanBuilder) buildSort(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int) (*LogicalSort, error) { - return b.buildSortWithCheck(ctx, p, byItems, aggMapper, windowMapper, nil, 0, false) -} - func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, - windowMapper map[*ast.WindowFuncExpr]int, projExprs []expression.Expression, oldLen int, hasDistinct bool) (LogicalPlan, []*util.ByItems, error) { + windowMapper map[*ast.WindowFuncExpr]int) (LogicalPlan, []*util.ByItems, error) { exprs := make([]*util.ByItems, 0, len(byItems)) transformer := &itemTransformer{} for _, item := range byItems { @@ -1998,65 +1811,203 @@ func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byIt if err != nil { return nil, nil, err } - - // check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442 - //if hasDistinct && projExprs != nil { - // err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen) - // if err != nil { - // return nil, err - // } - //} - p = np exprs = append(exprs, &util.ByItems{Expr: it, Desc: item.Desc}) } return p, exprs, nil } -func (b *PlanBuilder) buildSortWithCheck(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int, - projExprs []expression.Expression, oldLen int, hasDistinct bool) (*LogicalSort, error) { +func (b *PlanBuilder) buildSort(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int) (*LogicalSort, error) { if _, isUnion := p.(*LogicalUnionAll); isUnion { b.curClause = globalOrderByClause } else { b.curClause = orderByClause } - sort := LogicalSort{}.Init(b.ctx, b.getSelectOffset()) - exprs := make([]*util.ByItems, 0, len(byItems)) - transformer := &itemTransformer{} - for i, item := range byItems { - newExpr, _ := item.Expr.Accept(transformer) - item.Expr = newExpr.(ast.ExprNode) - it, np, err := b.rewriteWithPreprocess(ctx, item.Expr, p, aggMapper, windowMapper, true, nil) - if err != nil { - return nil, err - } - - // check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442 - if hasDistinct && projExprs != nil { - err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen) - if err != nil { - return nil, err - } - } - - p = np - exprs = append(exprs, &util.ByItems{Expr: it, Desc: item.Desc}) + pSort := LogicalSort{}.Init(b.ctx, b.getSelectOffset()) + p, orderByItemExprs, err := b.buildOrderByItems(ctx, p, byItems, aggMapper, windowMapper) + if err != nil { + return nil, err } - sort.ByItems = exprs - sort.SetChildren(p) - return sort, nil + pSort.ByItems = orderByItemExprs + pSort.SetChildren(p) + return pSort, nil } -func (b *PlanBuilder) buildSortWithCheckV2(p LogicalPlan, byItems []*util.ByItems) (*LogicalSort, error) { +func (b *PlanBuilder) buildSortCheck(ctx context.Context, p LogicalPlan, sel *ast.SelectStmt, proj *LogicalProjection, oldLen int, orderMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int) (*LogicalSort, error) { if _, isUnion := p.(*LogicalUnionAll); isUnion { b.curClause = globalOrderByClause } else { b.curClause = orderByClause } - sort := LogicalSort{}.Init(b.ctx, b.getSelectOffset()) - sort.ByItems = byItems - sort.SetChildren(p) - return sort, nil + pSort := LogicalSort{}.Init(b.ctx, b.getSelectOffset()) + p, orderByItemExprs, err := b.buildOrderByItems(ctx, p, sel.OrderBy.Items, orderMapper, windowMapper) + if err != nil { + return nil, err + } + if !b.isCreateView { + // check only-full-group-by 4 order-by clause with group-by clause + fds := proj.ExtractFD() + if sel.OrderBy != nil { + selectExprsUniqueIDs := fd.NewFastIntSet() + for _, expr := range proj.Exprs[:oldLen] { + switch x := expr.(type) { + case *expression.Column: + selectExprsUniqueIDs.Insert(int(x.UniqueID)) + case *expression.ScalarFunction: + scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) + if !ok { + panic("selected expr must have been registered, shouldn't be here") + } + selectExprsUniqueIDs.Insert(scalarUniqueID) + default: + } + } + if sel.GroupBy != nil { + for offset, odrItem := range orderByItemExprs { + item := fd.NewFastIntSet() + switch x := odrItem.Expr.(type) { + case *expression.Column: + item.Insert(int(x.UniqueID)) + case *expression.ScalarFunction: + // order by item may not be projected as a column in projection, allocated a new one + scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) + if !ok { + // panic("selected expr must have been registered, shouldn't be here") + scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID()) + } + item.Insert(scalarUniqueID) + case *expression.Constant: + // order by null/false/true can always be ok, let item be empty. + default: + } + // Step#1: whether order by item is in the origin select field list + if item.SubsetOf(selectExprsUniqueIDs) { + continue + } + // Step#2: whether order by item is in the group by list + if item.SubsetOf(fds.GroupByCols) { + continue + } + // Step#3: whether order by item is in the FD closure of group-by & select list items. + if item.SubsetOf(fds.ConstantCols()) { + continue + } + strictClosureOfGroupByCols := fds.ClosureOfStrict(fds.GroupByCols) + if item.SubsetOf(strictClosureOfGroupByCols) { + continue + } + strictClosureOfSelectCols := fds.ClosureOfStrict(selectExprsUniqueIDs) + if item.SubsetOf(strictClosureOfSelectCols) { + continue + } + // locate the base col that are not in (constant list / group by list / strict fd closure) for error show. + baseCols := expression.ExtractColumns(odrItem.Expr) + if len(baseCols) == 0 { + // order by item has no reference of base col, like order by abs(1) and rand() + continue + } + // GROUP BY t1.a, t2.b ORDER BY COALESCE(MIN(t.c), t2.b), agg min is determined by group-col definitely. + baseColsUniqueIDs := fd.NewFastIntSet() + for _, bc := range baseCols { + baseColsUniqueIDs.Insert(int(bc.UniqueID)) + } + if baseColsUniqueIDs.SubsetOf(strictClosureOfGroupByCols) { + continue + } + if baseColsUniqueIDs.SubsetOf(strictClosureOfSelectCols) { + continue + } + errShowCol := baseCols[0] + for _, col := range baseCols { + colSet := fd.NewFastIntSet(int(col.UniqueID)) + if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) { + errShowCol = col + break + } + } + // better use the schema alias name firstly if any. + name := "" + for idx, schemaCol := range proj.Schema().Columns { + if schemaCol.UniqueID == errShowCol.UniqueID { + name = proj.names[idx].String() + break + } + } + if name == "" { + name = errShowCol.OrigName + } + return nil, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInOrderBy, name) + } + } + if sel.Distinct { + // order-by with distinct case + // Rule #1: order by item should be in the select filed list + // Rule #2: the base col that order by item dependent on should be in the select field list + for offset, odrItem := range orderByItemExprs { + item := fd.NewFastIntSet() + switch x := odrItem.Expr.(type) { + case *expression.Column: + item.Insert(int(x.UniqueID)) + case *expression.ScalarFunction: + // order by item may not be projected as a column in projection, allocated a new one + scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) + if !ok { + // panic("selected expr must have been registered, shouldn't be here") + scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID()) + } + item.Insert(scalarUniqueID) + default: + } + // Rule #1 + if item.SubsetOf(selectExprsUniqueIDs) { + continue + } + // Rule #2 + baseCols := expression.ExtractColumns(odrItem.Expr) + if len(baseCols) == 0 { + // order by item has no reference of base col, like order by abs(1) and rand() + continue + } + colSet := fd.NewFastIntSet() + for _, col := range baseCols { + colSet.Insert(int(col.UniqueID)) + } + if colSet.SubsetOf(selectExprsUniqueIDs) { + continue + } + // find that error base col + errShowCol := baseCols[0] + for _, col := range baseCols { + colSet := fd.NewFastIntSet() + colSet.Insert(int(col.UniqueID)) + if !colSet.SubsetOf(selectExprsUniqueIDs) { + errShowCol = col + break + } + } + // better use the schema alias name firstly if any. + name := "" + for idx, schemaCol := range proj.Schema().Columns { + if schemaCol.UniqueID == errShowCol.UniqueID { + name = proj.names[idx].String() + break + } + } + if name == "" { + name = errShowCol.OrigName + } + if _, ok := sel.OrderBy.Items[offset].Expr.(*ast.AggregateFuncExpr); ok { + return nil, ErrAggregateInOrderNotSelect.GenWithStackByArgs(offset+1, "DISTINCT") + } + // select distinct count(a) from t group by b order by sum(a); ✗ + return nil, ErrFieldInOrderNotSelect.GenWithStackByArgs(offset+1, name, "DISTINCT") + } + } + } + } + pSort.ByItems = orderByItemExprs + pSort.SetChildren(p) + return pSort, nil } // checkOrderByInDistinct checks whether ORDER BY has conflicts with DISTINCT, see #12442 @@ -4053,7 +4004,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L windowAggMap map[*ast.AggregateFuncExpr]int correlatedAggMap map[*ast.AggregateFuncExpr]int gbyCols []expression.Expression - orderByItems []*util.ByItems + proj LogicalPlan ) // set for update read to true before building result set node @@ -4202,7 +4153,8 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L var oldLen int // According to https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html, // we can only process window functions after having clause, so `considerWindow` is false now. - p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, orderMap, false, sel.OrderBy != nil) + proj, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, orderMap, false, sel.OrderBy != nil) + p = proj if err != nil { return nil, err } @@ -4240,7 +4192,8 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L // In such case plan `p` is not changed, so we don't have to build another projection. if hasWindowFuncField { // Now we build the window function fields. - p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, orderMap, true, false) + proj, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, orderMap, true, false) + p = proj if err != nil { return nil, err } @@ -4256,7 +4209,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L if sel.OrderBy != nil { if b.ctx.GetSessionVars().SQLMode.HasOnlyFullGroupBy() { - p, err = b.buildSortWithCheckV2(p, orderByItems) + p, err = b.buildSortCheck(ctx, p, sel, proj.(*LogicalProjection), oldLen, orderMap, windowMapper) } else { p, err = b.buildSort(ctx, p, sel.OrderBy.Items, orderMap, windowMapper) } diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index 04d9ae146542d..44ae6760df5f3 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -15,7 +15,6 @@ package core import ( - "fmt" "math" "github.com/pingcap/tidb/expression" @@ -596,6 +595,13 @@ func (p *LogicalProjection) ExtractFD() *fd.FDSet { fds.MakeNotNull(notnullColsUniqueIDs) // select max(a) from t group by b, we should project both `a` & `b` to maintain the FD down here, even if select-fields only contain `a`. fds.ProjectCols(outputColsUniqueIDs.Union(fds.GroupByCols)) + if fds.HasAggBuilt && fds.GroupByCols.Only1Zero() && p.baseLogicalPlan.FDChecked { + // maxOneRow is delayed from agg's ExtractFD logic since some details listed in it. + fds.MaxOneRow(outputColsUniqueIDs) + // for select * from view (include agg), outer projection don't have to check select list with the inner group-by flag. + fds.HasAggBuilt = false + fds.GroupByCols.Clear() + } // just trace it down in every operator for test checking. p.fdSet = fds return fds @@ -1078,19 +1084,16 @@ func (la *LogicalApply) ExtractFD() *fd.FDSet { } } // select (select t1.a from t1 where t1.rid = t2.id), count(t2.b) from t2 group by (t2.id) - // for correlated scalar sub-query, the whole sub-query will be projected as a new column for example X here. + // for correlated scalar sub-query, the whole sub-query will be projected as a new column for example here. // while for every same t2.id, this sub-query's scalar output must be the same, actually it's a kind of strict FD here. applyStrictDetermine := fd.NewFastIntSet() applyStrictDependency := fd.NewFastIntSet() - if innerPlan.Schema().Len() == 1 { + if innerPlan.Schema().Len() == 1 && len(deduplicateCorrelatedCols) > 0 { // single column in apply join inner side will be output directly. for _, cc := range deduplicateCorrelatedCols { applyStrictDetermine.Insert(int(cc.UniqueID)) } applyStrictDependency.Insert(int(innerPlan.Schema().Columns[0].UniqueID)) - - } else { - // multi columns will be wrapped as a new row func, actually errors like: Operand should contain 1 column(s) } switch la.JoinType { @@ -1099,7 +1102,6 @@ func (la *LogicalApply) ExtractFD() *fd.FDSet { case LeftOuterJoin, RightOuterJoin: fds := la.extractFDForOuterJoin(eqCond) fds.AddStrictFunctionalDependency(applyStrictDetermine, applyStrictDependency) - fmt.Println("cao: from", applyStrictDetermine.String(), "to", applyStrictDependency) return fds case SemiJoin: return la.extractFDForSemiJoin(eqCond) diff --git a/planner/core/plan.go b/planner/core/plan.go index 216dc4c3c1654..79c7615bb9a1c 100644 --- a/planner/core/plan.go +++ b/planner/core/plan.go @@ -377,7 +377,8 @@ type baseLogicalPlan struct { // including eliminating unnecessary DISTINCT operators, simplifying ORDER BY columns, // removing Max1Row operators, and mapping semi-joins to inner-joins. // for now, it's hard to maintain in individual operator, build it from bottom up when using. - fdSet *fd.FDSet + fdSet *fd.FDSet + FDChecked bool } // ExtractFD return the children[0]'s fdSet if there are no adding/removing fd in this logic plan. From 6fd8035a4a4c1b17185f5e66bd3ef82809edf6f0 Mon Sep 17 00:00:00 2001 From: arenatlx Date: Tue, 29 Mar 2022 23:29:03 +0800 Subject: [PATCH 07/13] fix order by for skipping extract correlated col Signed-off-by: arenatlx --- planner/core/logical_plan_builder.go | 77 ++++++++++++++-------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index a0ffeb5128e24..9321309bdf523 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -3325,18 +3325,18 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt // Since here doesn't require FD to check, this can take place as soon as possible. // Step1: isNonAggregatedQuery := false - resolver1 := colResolverForOnlyFullGroupBy{ + resolver4AggDetect := colResolverForOnlyFullGroupBy{ firstOrderByAggColIdx: -1, } - resolver1.curClause = fieldList + resolver4AggDetect.curClause = fieldList for idx, field := range sel.Fields.Fields { - resolver1.exprIdx = idx - field.Accept(&resolver1) + resolver4AggDetect.exprIdx = idx + field.Accept(&resolver4AggDetect) } if sel.Having != nil { - sel.Having.Expr.Accept(&resolver1) + sel.Having.Expr.Accept(&resolver4AggDetect) } - if !resolver1.hasAggFunc { + if !resolver4AggDetect.hasAggFunc { isNonAggregatedQuery = true } @@ -3344,7 +3344,7 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt if sel.OrderBy != nil && isNonAggregatedQuery { // Temporarily solve the case like: SELECT a FROM t1 ORDER BY (SELECT COUNT(t2.a) FROM t1 AS t2); // We need to detect the correlated agg in order by clause, and report error in the outer query. - resolver2 := &correlatedAggregateResolver{ + resolver4CorAggDetect := &correlatedAggregateResolver{ ctx: ctx, b: b, outerPlan: p, @@ -3353,47 +3353,45 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt // YES: SQL like `select a from t where a = 1 order by count(b)` is illegal. // NO: SELECT t1.a FROM t1 GROUP BY t1.a HAVING t1.a IN (SELECT t2.a FROM t2 ORDER BY SUM(t1.b)) is ok. // judge whether the aggregated func in order by item is referred to outer schema. - resolver1.curClause = orderByClause - resolver1.orderByAggMap = make(map[*ast.AggregateFuncExpr]int, len(sel.OrderBy.Items)) + resolver4AggDetect.curClause = orderByClause + resolver4AggDetect.orderByAggMap = make(map[*ast.AggregateFuncExpr]int, len(sel.OrderBy.Items)) for idx, byItem := range sel.OrderBy.Items { - resolver1.exprIdx = idx - byItem.Expr.Accept(&resolver1) + resolver4AggDetect.exprIdx = idx + byItem.Expr.Accept(&resolver4AggDetect) // detect the correlated agg in sub query. - _, ok := byItem.Expr.Accept(resolver2) + _, ok := byItem.Expr.Accept(resolver4CorAggDetect) if !ok { - return resolver2.err + return resolver4CorAggDetect.err } - if len(resolver2.correlatedAggFuncs) > 0 { - for _, one := range resolver2.correlatedAggFuncs { - if _, ok := resolver1.orderByAggMap[one]; !ok { - resolver1.orderByAggMap[one] = idx + if len(resolver4CorAggDetect.correlatedAggFuncs) > 0 { + for _, one := range resolver4CorAggDetect.correlatedAggFuncs { + if _, ok := resolver4AggDetect.orderByAggMap[one]; !ok { + resolver4AggDetect.orderByAggMap[one] = idx } } - resolver2.correlatedAggFuncs = resolver2.correlatedAggFuncs[:0] + resolver4CorAggDetect.correlatedAggFuncs = resolver4CorAggDetect.correlatedAggFuncs[:0] } } - // has no order-by agg. - if len(resolver1.orderByAggMap) == 0 { - return nil - } // has some order-by agg. - for k, v := range resolver1.orderByAggMap { - var cols []*expression.Column - for _, arg := range k.Args { - newArg, np, err := b.rewrite(ctx, arg, p, nil, true) - if err != nil { - return err + if len(resolver4AggDetect.orderByAggMap) != 0 { + for k, v := range resolver4AggDetect.orderByAggMap { + var cols []*expression.Column + for _, arg := range k.Args { + newArg, np, err := b.rewrite(ctx, arg, p, nil, true) + if err != nil { + return err + } + p = np + cols = append(cols, expression.ExtractColumns(newArg)...) } - p = np - cols = append(cols, expression.ExtractColumns(newArg)...) - } - // use outer logical query block correlated cols --- valid - if len(cols) == 0 { - continue + // use outer logical query block correlated cols --- valid + if len(cols) == 0 { + continue + } + // use logical query block cols --- invalid + return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(v + 1) } - // use logical query block cols --- invalid - return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(v + 1) } } // we need extract all current-level-correlated columns from order by sub-query item to select fields if it has. @@ -3416,7 +3414,8 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt Name: pname.ColName, } sel.Fields.Fields = append(sel.Fields.Fields, &ast.SelectField{ - Auxiliary: true, + Auxiliary: true, + // todo: not sure here, it should be unified with correlatedAggExtractor! AuxiliaryColInAgg: true, Expr: &ast.ColumnNameExpr{Name: colName}, }) @@ -4153,7 +4152,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L var oldLen int // According to https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html, // we can only process window functions after having clause, so `considerWindow` is false now. - proj, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, orderMap, false, sel.OrderBy != nil) + proj, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, false, sel.OrderBy != nil) p = proj if err != nil { return nil, err @@ -4192,7 +4191,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L // In such case plan `p` is not changed, so we don't have to build another projection. if hasWindowFuncField { // Now we build the window function fields. - proj, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, orderMap, true, false) + proj, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, true, false) p = proj if err != nil { return nil, err From 61bc37a0069048bab429e4b8f7ab8774265bcc99 Mon Sep 17 00:00:00 2001 From: arenatlx Date: Tue, 29 Mar 2022 23:38:35 +0800 Subject: [PATCH 08/13] add addFrom commnet Signed-off-by: arenatlx --- planner/funcdep/fd_graph.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/planner/funcdep/fd_graph.go b/planner/funcdep/fd_graph.go index 243f2eb4f8043..b0f91a37f55ed 100644 --- a/planner/funcdep/fd_graph.go +++ b/planner/funcdep/fd_graph.go @@ -882,6 +882,11 @@ func (s FDSet) AllCols() FastIntSet { // AddFrom merges two FD sets by adding each FD from the given set to this set. // Since two different tables may have some column ID overlap, we better use // column unique ID to build the FDSet instead. +// +// AddFrom is commonly used to pull underlining FD up to current operator. Since +// some ancillary factors for FD like hasAggBuild and GroupCols is strictly limited +// into an exact scope --- that logical query block, but FD doesn't. So we should +// clean that stuff when pulling FD across logical query blocks. func (s *FDSet) AddFrom(fds *FDSet, acrossBlock bool) { for i := range fds.fdEdges { fd := fds.fdEdges[i] From 19d47b0813d907a9917ca0f59bffe5f65a010556 Mon Sep 17 00:00:00 2001 From: arenatlx Date: Tue, 29 Mar 2022 23:41:39 +0800 Subject: [PATCH 09/13] . Signed-off-by: arenatlx --- planner/core/plan.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/plan.go b/planner/core/plan.go index 79c7615bb9a1c..0159b400088b5 100644 --- a/planner/core/plan.go +++ b/planner/core/plan.go @@ -387,7 +387,7 @@ func (p *baseLogicalPlan) ExtractFD() *fd.FDSet { return p.fdSet } fds := &fd.FDSet{HashCodeToUniqueID: make(map[string]int)} - // isolation between different logical selection blocks. + // isolation between different logical query blocks. acrossBlock := false for _, ch := range p.children { if p.SelectBlockOffset() != ch.SelectBlockOffset() { From 34ee6d5e4b1cbfd522aceb9742b5a729ab3a212d Mon Sep 17 00:00:00 2001 From: arenatlx Date: Wed, 30 Mar 2022 11:15:27 +0800 Subject: [PATCH 10/13] add test Signed-off-by: arenatlx --- planner/funcdep/only_full_group_by_test.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/planner/funcdep/only_full_group_by_test.go b/planner/funcdep/only_full_group_by_test.go index 4749b29786e97..1edd739fa9c2b 100644 --- a/planner/funcdep/only_full_group_by_test.go +++ b/planner/funcdep/only_full_group_by_test.go @@ -244,4 +244,12 @@ func TestOnlyFullGroupByOldCases(t *testing.T) { require.NotNil(t, err) require.Equal(t, err.Error(), "[executor:1242]Subquery returns more than 1 row") tk.MustQuery("SELECT SUM(a) FROM t1 ORDER BY (SELECT COUNT(t2.a) FROM t1 AS t2);") + + // fix issue 26945 + tk.MustExec("drop table if exists t1,t2") + tk.MustExec("create table t1(a int, b int)") + tk.MustExec("create table t2(a int, b int)") + tk.MustExec("insert into t1 values(1,1)") + tk.MustExec("insert into t2 values(1,1)") + tk.MustQuery("select one.a from t1 one order by (select two.b from t2 two where two.a = one.b)").Check(testkit.Rows("1")) } From 2d18cc8e0861f1b479e25e471b8929b997399242 Mon Sep 17 00:00:00 2001 From: AilinKid <314806019@qq.com> Date: Wed, 20 Apr 2022 16:58:06 +0800 Subject: [PATCH 11/13] fix-order-by-fd2 Signed-off-by: AilinKid <314806019@qq.com> --- planner/core/logical_plans.go | 19 +-- planner/funcdep/doc.go | 151 +++++++++++++++++++++ planner/funcdep/fd_graph.go | 133 ++++++++++++------ planner/funcdep/only_full_group_by_test.go | 1 + 4 files changed, 252 insertions(+), 52 deletions(-) diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index 44ae6760df5f3..1f5028f780078 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -15,7 +15,9 @@ package core import ( + "fmt" "math" + "strings" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/expression/aggregation" @@ -365,6 +367,10 @@ func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expres outerFD.GroupByCols.Clear() } fds := outerFD + + if strings.HasPrefix(p.ctx.GetSessionVars().StmtCtx.OriginalSQL, "select c1.a, count(*) from customer2 c3 left join (customer1 c1 left join customer2 c2 on c1.a=c2.b) on c3.b=c1.a where c2.pk in (7,9) group by c2.b") { + fmt.Println(1) + } fds.MakeOuterJoin(innerFD, filterFD, outerCols, innerCols, &opt, innerAcrossBlock) p.fdSet = fds return fds @@ -1008,19 +1014,6 @@ func (p *LogicalSelection) ExtractFD() *fd.FDSet { // extract equivalence cols. equivUniqueIDs := extractEquivalenceCols(p.Conditions, p.SCtx(), fds) - // after left join, according to rule 3.3.3, it may create a lax FD from inner equivalence - // cols pointing to outer equivalence cols. eg: t left join t1 on t.a = t1.b, leading a - // lax FD from t1.b ~> t.a, this lax attribute is coming from supplied null value to all - // left rows, once there is a null-refusing predicate on the inner side on upper layer, this - // can be equivalence again. (the outer rows left are all coming from equal matching) - // - // why not just makeNotNull of them, because even a non-equiv-related inner col can also - // refuse supplied null values. - if fds.Rule333Equiv.InnerCols.Len() != 0 && notnullColsUniqueIDs.Intersects(fds.Rule333Equiv.InnerCols) { - // restore/re-strength FDs from rule 333 - fds.MakeRestoreRule333() - } - // apply operator's characteristic's FD setting. fds.MakeNotNull(notnullColsUniqueIDs) fds.AddConstants(constUniqueIDs) diff --git a/planner/funcdep/doc.go b/planner/funcdep/doc.go index a70399c7a1b1b..9c97ac781888c 100644 --- a/planner/funcdep/doc.go +++ b/planner/funcdep/doc.go @@ -10,3 +10,154 @@ package funcdep // https://cs.uwaterloo.ca/research/tr/2000/11/CS-2000-11.thesis.pdf // TODO: Add the RFC design. + +// NOTE 1. +// when handling Lax FD, we don't care the null value in the dependency, which means +// as long as null-attribute coverage of the determinant can make a Lax FD as strict one. + +// The definition of "lax" used in the paper differs from the definition used by this +// library. For a lax dependency A~~>B, the paper allows this set of rows: +// +// a b +// ------- +// 1 1 +// 1 NULL +// +// This alternate definition is briefly covered in section 2.5.3.2 of the paper (see definition +// 2.19). The reason for this change is to allow a lax dependency to be upgraded to a strict +// dependency more readily, needing only the determinant columns to be not-null rather than +// both determinant and dependant columns. +// +// This is on the condition that, for definite values of determinant of a Lax FD, it won't +// have two same definite dependant value. That's true, because there is no way can derive +// to this kind of FD. +// +// Even in our implementation of outer join, the only way to produce duplicate definite +// determinant is the join predicate. But for now, we only maintain the equivalence and +// some strict FD of it. +// +// t(a,b) left join t1(c,d,e) on t.a = t1.c and b=1 +// a b | c d e +// ------+---------------- +// 1 1 | 1 NULL 1 +// 1 2 | NULL NULL NULL +// 2 1 | NULL NULL NULL +// +// Actually it's possible, the lax FD {a} -> {c} can be derived but not that useful. we only +// maintain the {c} ~> {a} for existence after outer join. Besides, there two Cond-FD should +// be preserved waiting for be visible again once with the null-reject on the condition of +// null constraint columns. (see below) +// +// NOTE 2. +// When handle outer join, it won't produce lax FD with duplicate definite determinant values and +// different dependency values. +// +// In implementation,we come across some lax FD dependent on null-reject of some other cols. For +// example. +// t(a,b) left join t1(c,d,e) on t.a = t1.c and b=1 +// a b | c d e +// ------+---------------- +// 1 1 | 1 NULL 1 +// 1 2 | NULL NULL NULL +// 2 1 | NULL NULL NULL +// +// here constant FD {} -> {b} won't be existed after the outer join is done. Notice null-constraint +// {c,d,e} -| {c,d,e}, this FD should be preserved and will be visible again when some null-reject +// predicate take effect on the null-constraint cols. +// +// It's same for strict equivalence {t.a} = {t1.c}. Notice there are no lax equivalence here, because +// left side couldn't be guaranteed to be definite or null. like a=2 here. Let's collect all of this +// on-condition FD down, correspondent with a null-constraints column set, name it as Cond-FD. +// +// lax equivalencies are theoretically possible, but it won't be constructed from an outer join unless +// t already has a constant FD in column `a` here before outer join take a run. So the lax equivalence +// has some pre-conditions as you see, and it couldn't cover the case shown above. Let us do it like a +// Cond-FD does. +// +// The FD constructed from the join predicate should be considered as Cond-FD. Here like equivalence of +// {a} == {c} and constant FD {b} = 1 (if the join condition is e=1, it's here too). We can say that for +// every matched row, this FDs is valid, while for the other rows, the inner side are supplied of null +// rows. So this FDs are stored as ncEdges with nc condition of all inner table cols. +// +// We introduced invisible FD with null-constraint column to solve the problem above named as Cond-FD. +// For multi embedded left join, we take the following case as an example. +// a,b c,d,e +// -----------+----------- +// 1 2 | 1 1 1 +// 2 2 | +// -----------+----------- +// +// left join on (a=c) res: +// a b c e e +// ------------------------- +// 1 2 1 1 1 +// 2 2 +- null null null -+ +// | | +// +-------------------+ +// \ +// \ +// the Cond-FD are < a=c with {c,d,e} > the latter is as null constraint cols +// +// e,f +// ----------------------- +// 1 2 +// 2 2 +// 3 3 +// ----------------------- +// +// left join on (e=a) res: +// e f a b c d e +// ----------------------------------- +// 1 2 1 2 1 1 1 +// 2 2 2 2 +- null null null --+---------------> Cond-FD are still exists. +// 3 3 +-null null | null null null |---+ +// | +-------------------+ | +// +-----------------------------------+-----------> New Cond-FD are occurs. +// +// +// the old Cond-FD with null constraint columns set {c,d,e} is preserved cause new append cols are all null too. +// the new Cond-FD with null constraint columns set {a,b,c,d,e} are also meaningful, even if the null-reject column +// is one of {c,d,e} which may reduce one of the matched row out of the result, the equivalence {a}={e} still exist. +// +// Provide that the result of the first left join is like: +// left join on (a=c) res: +// a b c e e +// --------------------------- +// 1 2 1 1 1 +// null 2 null null null +// +// THEN: left join on (e=a) res: +// e f a b c d e +// --------------------------------- +// 1 2 1 2 1 1 1 +// 2 2 null null null null null +// 3 3 3 3 null null null +// +// Even like that, the case of old Cond-FD and new Cond-FD are existed too. Seems the null-constraint column set of +// old Cond-FD {c,d,e} can be expanded as {a,b,c,d,e} visually, but we couldn't derive the inference of the join predicate +// (e=a). The null-reject of column `a` couldn't bring the visibility to the old Cond-FD theoretically, it just happened +// to refuse that row with a null value in column a. +// +// Think about adding one more row in first left join result. +// +// left join on (a=c) res: +// a b c e e +// --------------------------- +// 1 2 1 1 1 +// null 2 null null null +// 3 3 null null null +// +// THEN: left join on (e=a) res: +// e f a b c d e +// --------------------------------- +// 1 2 1 2 1 1 1 +// 2 2 null null null null null +// 3 3 3 3 null null null +// +// Conclusion: +// As you see that's right we couldn't derive the inference of the join predicate (e=a) to expand old Cond-FD's nc +// {c,d,e} as {a,b,c,d,e}. So the rule for Cond-FD is quite simple, just keep the old ncEdge from right, appending +// the new ncEdges in current left join. +// +// If the first left join result is in the outer side of the second left join, just keep the ncEdge from left as well, +// appending the new ncEdges in current left join. diff --git a/planner/funcdep/fd_graph.go b/planner/funcdep/fd_graph.go index b0f91a37f55ed..51bdf95bb43a4 100644 --- a/planner/funcdep/fd_graph.go +++ b/planner/funcdep/fd_graph.go @@ -29,14 +29,48 @@ type fdEdge struct { // The value of the strict and eq bool forms the four kind of edges: // functional dependency, lax functional dependency, strict equivalence constraint, lax equivalence constraint. // And if there's a functional dependency `const` -> `column` exists. We would let the from side be empty. + // Adjustment: when strict is true and equiv is false, it means the edge is a Lax equivalence; when both true, + // it means the original Strict Equivalence. + // LAX EQ: (must have the exact same number of columns in each side if a lax EQ) + // {A} ~= {C} should be strengthened as {A} == {C} only with AC as definite. + // {A} ~= {C} & {C} ~= {D} unless C is definite, we won't get {A} ~= {D}. eg: {1} ~= {null} ~= {2} + // {AB} ~= {CD} won't derive to {A} ~= {C}, the opposite side is either, eg: {1,null} ~= {2,null} & {1,2} !(~=) {1,3} strict bool equiv bool + + // FD with non-nil conditionNC is hidden in FDSet, it will be visible again when at least one null-reject column in conditionNC. + // conditionNC should be satisfied before some FD make vision again, it's quite like lax FD to be strengthened as strict + // one. But the constraints should take effect on specified columns from conditionNC rather than just determinant columns. + conditionNC *FastIntSet +} + +// ncEdge is quite simple for remarking the null value relationship between cols, storing it as fdEdge will add complexity of traverse of a closure. +type ncEdge struct { + // null constraints = determinants -> dependencies + // determinants = from + // dependencies = to + // when the `from` side is null, the `to` side must be null as well. + // ------------------------------- + // e f a b c d e + // 1 2 1 2 1 1 1 + // 2 2 null 2 null null null + // 3 3 null null null null null + // {b} -| {a,c,d,e} + // {a,c,d,e} -| {a,c,d,e} + from FastIntSet + to FastIntSet } // FDSet is the main portal of functional dependency, it stores the relationship between (extended table / physical table)'s // columns. For more theory about this design, ref the head comments in the funcdep/doc.go. type FDSet struct { fdEdges []*fdEdge + // after left join, according to rule 3.3.3, it may create a lax FD from inner equivalence + // cols pointing to outer equivalence cols. eg: t left join t1 on t.a = t1.b, leading a + // lax FD from t1.b ~> t.a, this lax attribute is coming from supplied null value to all + // left rows, once there is a null-refusing predicate on the inner side on upper layer, this + // can be equivalence again. (the outer rows left are all coming from equal matching) + ncEdges []*fdEdge // NotNullCols is used to record the columns with not-null attributes applied. // eg: {1} ~~> {2,3}, when {2,3} not null is applied, it actually does nothing. // but we should record {2,3} as not-null down for the convenience of transferring @@ -49,19 +83,7 @@ type FDSet struct { // GroupByCols is used to record columns / expressions that under the group by phrase. GroupByCols FastIntSet HasAggBuilt bool - // after left join, according to rule 3.3.3, it may create a lax FD from inner equivalence - // cols pointing to outer equivalence cols. eg: t left join t1 on t.a = t1.b, leading a - // lax FD from t1.b ~> t.a, this lax attribute is coming from supplied null value to all - // left rows, once there is a null-refusing predicate on the inner side on upper layer, this - // can be equivalence again. (the outer rows left are all coming from equal matching) - // - // why not just makeNotNull of them, because even a non-equiv-related inner col can also - // refuse supplied null values. // todo: when multi join and across select block, this may need to be maintained more precisely. - Rule333Equiv struct { - Edges []*fdEdge - InnerCols FastIntSet - } } // ClosureOfStrict is exported for outer usage. @@ -215,6 +237,19 @@ func (s *FDSet) AddLaxFunctionalDependency(from, to FastIntSet) { s.addFunctionalDependency(from, to, false, false) } +func (s *FDSet) AddNCFunctionalDependency(from, to, nc FastIntSet, strict, equiv bool) { + // Since nc edge is invisible by now, just collecting them together simply, once the + // null-reject on nc cols is satisfied, let's pick them out and insert into the fdEdge + // normally. + s.ncEdges = append(s.ncEdges, &fdEdge{ + from: from, + to: to, + strict: strict, + equiv: equiv, + conditionNC: &nc, + }) +} + // addFunctionalDependency will add strict/lax functional dependency to the fdGraph. // eg: // CREATE TABLE t (a int key, b int, c int, d int, e int, UNIQUE (b,c)) @@ -287,6 +322,7 @@ func (s *FDSet) addFunctionalDependency(from, to FastIntSet, strict, equiv bool) // implies is used to shrink the edge size, keeping the minimum of the functional dependency set size. func (e *fdEdge) implies(otherEdge *fdEdge) bool { // The given one's from should be larger than the current one and the current one's to should be larger than the given one. + // ***************************** IMPLY IN SAME TYPE********************************************* // STRICT FD: // A --> C is stronger than AB --> C. --- YES // A --> BC is stronger than A --> C. --- YES @@ -294,6 +330,19 @@ func (e *fdEdge) implies(otherEdge *fdEdge) bool { // LAX FD: // 1: A ~~> C is stronger than AB ~~> C. --- YES // 2: A ~~> BC is stronger than A ~~> C. --- NO + // + // STRICT EQ: + // since {superset} == {superset} won't collapse with each other. --- NO + // + // LAX EQ: + // 1: {A} ~= {C} is stronger than {AB} ~= {CD}. --- NO + // 2: {AB} ~= {CD} is stronger than {A} ~= {C}. --- NO + // + // ***************************** IMPLY IN DIFF TYPE********************************************* + // 1: {A} == {B} is stronger than {A} ~= {B}, {A} -> {B}, {A} ~> {B} + // 2: {A} ~= {B} is stronger than {A} ~> {B} + // 3: {A} -> {B} is stronger than {A} ~> {B} + // // The precondition for 2 to be strict FD is much easier to satisfied than 1, only to // need {a,c} is not null. So we couldn't merge this two to be one lax FD. // but for strict/equiv FD implies lax FD, 1 & 2 is implied both reasonably. @@ -425,6 +474,7 @@ func (s *FDSet) AddConstants(cons FastIntSet) { shouldRemoved = true } } + // pre-condition NOTE 1 in doc.go, it won't occur duplicate definite determinant of Lax FD. // for strict or lax FDs, both can reduce the dependencies side columns with constant closure. if fd.removeColumnsToSide(cols) { shouldRemoved = true @@ -507,6 +557,29 @@ func (s *FDSet) EquivalenceCols() (eqs []*FastIntSet) { func (s *FDSet) MakeNotNull(notNullCols FastIntSet) { notNullCols.UnionWith(s.NotNullCols) notNullColsSet := s.closureOfEquivalence(notNullCols) + // make nc FD visible. + for i := 0; i < len(s.ncEdges); i++ { + fd := s.ncEdges[i] + if fd.conditionNC.Intersects(notNullColsSet) { + // condition satisfied. + s.ncEdges = append(s.ncEdges[:i], s.ncEdges[i+1:]...) + i-- + if fd.isConstant() { + s.AddConstants(fd.to) + } else if fd.equiv { + s.AddEquivalence(fd.from, fd.to) + newNotNullColsSet := s.closureOfEquivalence(notNullColsSet) + if !newNotNullColsSet.Difference(notNullColsSet).IsEmpty() { + notNullColsSet = newNotNullColsSet + // expand not-null set. + i = -1 + } + } else { + s.addFunctionalDependency(fd.from, fd.to, fd.strict, fd.equiv) + } + } + } + // make origin FD strengthened. for i := 0; i < len(s.fdEdges); i++ { fd := s.fdEdges[i] if fd.strict { @@ -711,18 +784,15 @@ func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols F s.addFunctionalDependency(edge.from, edge.to, false, edge.equiv) } } + for _, edge := range innerFDs.ncEdges { + s.ncEdges = append(s.ncEdges, edge) + } leftCombinedFDFrom := NewFastIntSet() leftCombinedFDTo := NewFastIntSet() for _, edge := range filterFDs.fdEdges { // Rule #3.2, constant FD are removed from right side of left join. if edge.isConstant() { - s.Rule333Equiv.Edges = append(s.Rule333Equiv.Edges, &fdEdge{ - from: edge.from, - to: edge.to, - strict: edge.strict, - equiv: edge.equiv, - }) - s.Rule333Equiv.InnerCols = innerCols + s.AddNCFunctionalDependency(edge.from, edge.to, innerCols, edge.strict, edge.equiv) continue } // Rule #3.3, we only keep the lax FD from right side pointing the left side. @@ -764,13 +834,7 @@ func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols F s.addFunctionalDependency(NewFastIntSet(i), NewFastIntSet(j), false, false) } } - s.Rule333Equiv.Edges = append(s.Rule333Equiv.Edges, &fdEdge{ - from: laxFDFrom, - to: laxFDTo, - strict: true, - equiv: true, - }) - s.Rule333Equiv.InnerCols = innerCols + s.AddNCFunctionalDependency(equivColsLeft, equivColsRight, innerCols, true, true) } // Rule #3.1, filters won't produce any strict/lax FDs. } @@ -825,18 +889,6 @@ func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols F } } -func (s *FDSet) MakeRestoreRule333() { - for _, eg := range s.Rule333Equiv.Edges { - if eg.isConstant() { - s.AddConstants(eg.to) - } else { - s.AddEquivalence(eg.from, eg.to) - } - } - s.Rule333Equiv.Edges = nil - s.Rule333Equiv.InnerCols.Clear() -} - type ArgOpts struct { SkipFDRule331 bool TypeFDRule331 TypeFilterFD331 @@ -900,6 +952,10 @@ func (s *FDSet) AddFrom(fds *FDSet, acrossBlock bool) { s.AddLaxFunctionalDependency(fd.from, fd.to) } } + for i := range fds.ncEdges { + fd := fds.ncEdges[i] + s.ncEdges = append(s.ncEdges, fd) + } s.NotNullCols.UnionWith(fds.NotNullCols) if s.HashCodeToUniqueID == nil { s.HashCodeToUniqueID = fds.HashCodeToUniqueID @@ -917,7 +973,6 @@ func (s *FDSet) AddFrom(fds *FDSet, acrossBlock bool) { } s.HasAggBuilt = fds.HasAggBuilt } - s.Rule333Equiv = fds.Rule333Equiv } // MaxOneRow will regard every column in the fdSet as a constant. Since constant is stronger that strict FD, it will diff --git a/planner/funcdep/only_full_group_by_test.go b/planner/funcdep/only_full_group_by_test.go index 1edd739fa9c2b..65367fe36d76f 100644 --- a/planner/funcdep/only_full_group_by_test.go +++ b/planner/funcdep/only_full_group_by_test.go @@ -153,6 +153,7 @@ func TestOnlyFullGroupByOldCases(t *testing.T) { // classic cases tk.MustQuery("select customer1.a, count(*) from customer1 left join customer2 on customer1.a=customer2.b where customer2.pk in (7,9) group by customer2.b;") tk.MustQuery("select customer1.a, count(*) from customer1 left join customer2 on customer1.a=1 where customer2.pk in (7,9) group by customer2.b;") + tk.MustQuery("select c1.a, count(*) from customer2 c3 left join (customer1 c1 left join customer2 c2 on c1.a=c2.b) on c3.b=c1.a where c2.pk in (7,9) group by c2.b;") tk.MustExec("drop view if exists customer") // this left join can extend left pk to all cols. tk.MustExec("CREATE algorithm=merge definer='root'@'localhost' VIEW customer as SELECT pk,a,b FROM customer1 LEFT JOIN customer2 USING (pk);") From a03a85397a36704816c710b3be95205820894baf Mon Sep 17 00:00:00 2001 From: AilinKid <314806019@qq.com> Date: Wed, 20 Apr 2022 17:51:56 +0800 Subject: [PATCH 12/13] fix the refine test Signed-off-by: AilinKid <314806019@qq.com> --- planner/core/logical_plans.go | 8 +------- planner/funcdep/fd_graph.go | 9 ++++++--- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index 1f5028f780078..fe3ec4beeb8d8 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -15,10 +15,6 @@ package core import ( - "fmt" - "math" - "strings" - "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/expression/aggregation" "github.com/pingcap/tidb/infoschema" @@ -36,6 +32,7 @@ import ( "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/ranger" "go.uber.org/zap" + "math" ) var ( @@ -368,9 +365,6 @@ func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expres } fds := outerFD - if strings.HasPrefix(p.ctx.GetSessionVars().StmtCtx.OriginalSQL, "select c1.a, count(*) from customer2 c3 left join (customer1 c1 left join customer2 c2 on c1.a=c2.b) on c3.b=c1.a where c2.pk in (7,9) group by c2.b") { - fmt.Println(1) - } fds.MakeOuterJoin(innerFD, filterFD, outerCols, innerCols, &opt, innerAcrossBlock) p.fdSet = fds return fds diff --git a/planner/funcdep/fd_graph.go b/planner/funcdep/fd_graph.go index 51bdf95bb43a4..4ac9c8d484512 100644 --- a/planner/funcdep/fd_graph.go +++ b/planner/funcdep/fd_graph.go @@ -1107,9 +1107,12 @@ func (s *FDSet) ProjectCols(cols FastIntSet) { continue } } - if fd.removeColumnsToSide(fd.from) { - // fd.to side is empty, remove this FD. - continue + // from and to side of equiv are same, don't do trivial elimination. + if !fd.isEquivalence() { + if fd.removeColumnsToSide(fd.from) { + // fd.to side is empty, remove this FD. + continue + } } } From b3b600e3a3f17ee168069c68fcadd557ff2f57dd Mon Sep 17 00:00:00 2001 From: Arenatlx Date: Mon, 25 Apr 2022 15:25:01 +0800 Subject: [PATCH 13/13] Update planner/core/logical_plan_builder.go Co-authored-by: Yiding Cui --- planner/core/logical_plan_builder.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 9321309bdf523..3591973079a8e 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -1942,7 +1942,7 @@ func (b *PlanBuilder) buildSortCheck(ctx context.Context, p LogicalPlan, sel *as if sel.Distinct { // order-by with distinct case // Rule #1: order by item should be in the select filed list - // Rule #2: the base col that order by item dependent on should be in the select field list + // Rule #2: the base col that order by item is dependent on should be in the select field list for offset, odrItem := range orderByItemExprs { item := fd.NewFastIntSet() switch x := odrItem.Expr.(type) {