From 2baed3e4a48a3cf0bd16db16dd698d6bab1d1d77 Mon Sep 17 00:00:00 2001
From: AilinKid <314806019@qq.com>
Date: Fri, 18 Mar 2022 00:02:30 +0800
Subject: [PATCH 01/13] fix order by fd check

Signed-off-by: AilinKid <314806019@qq.com>
---
 planner/core/logical_plan_builder.go | 190 +++++++++++++++++++++++++--
 planner/funcdep/extract_fd_test.go   |   3 -
 2 files changed, 179 insertions(+), 14 deletions(-)

diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index e413fa308f6d4..4ff30dc918e58 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -1235,8 +1235,9 @@ func findColFromNaturalUsingJoin(p LogicalPlan, col *expression.Column) (name *t
 }
 
 // buildProjection returns a Projection plan and non-aux columns length.
-func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, fields []*ast.SelectField, mapper map[*ast.AggregateFuncExpr]int,
-	windowMapper map[*ast.WindowFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, []expression.Expression, int, error) {
+func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *ast.SelectStmt, mapper map[*ast.AggregateFuncExpr]int,
+	windowMapper map[*ast.WindowFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, []*util.ByItems, int, error) {
+	fields := sel.Fields.Fields
 	err := b.preprocessUserVarTypes(ctx, p, fields, mapper)
 	if err != nil {
 		return nil, nil, 0, err
@@ -1321,6 +1322,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, fields
 	}
 	proj.SetChildren(p)
 	// delay the only-full-group-by-check in create view statement to later query.
+	var orderByItemExprs []*util.ByItems
 	if !b.isCreateView {
 		fds := proj.ExtractFD()
 		// Projection -> Children -> ...
@@ -1421,8 +1423,85 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, fields
 			// for select * from view (include agg), outer projection don't have to check select list with the inner group-by flag.
 			fds.HasAggBuilt = false
 		}
+		if sel.OrderBy != nil && sel.GroupBy != nil {
+			// check only-full-group-by 4 order-by clause with group-by clause
+			orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, mapper, windowMapper, proj.Exprs, oldLen, sel.Distinct)
+			if err != nil {
+				return nil, nil, 0, err
+			}
+			selectExprsUniqueIDs := fd.NewFastIntSet()
+			for _, expr := range proj.Exprs[:oldLen] {
+				switch x := expr.(type) {
+				case *expression.Column:
+					selectExprsUniqueIDs.Insert(int(x.UniqueID))
+				case *expression.ScalarFunction:
+					scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
+					if !ok {
+						panic("selected expr must have been registered, shouldn't be here")
+					}
+					selectExprsUniqueIDs.Insert(scalarUniqueID)
+				default:
+				}
+			}
+			for offset, odrItem := range orderByItemExprs {
+				item := fd.NewFastIntSet()
+				switch x := odrItem.Expr.(type) {
+				case *expression.Column:
+					item.Insert(int(x.UniqueID))
+				case *expression.ScalarFunction:
+					scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
+					if !ok {
+						panic("selected expr must have been registered, shouldn't be here")
+					}
+					item.Insert(scalarUniqueID)
+				default:
+				}
+				// Step#1: whether order by item is in the origin select field list
+				if item.SubsetOf(selectExprsUniqueIDs) {
+					continue
+				}
+				// Step#2: whether order by item is in the group by list
+				if item.SubsetOf(fds.GroupByCols) {
+					continue
+				}
+				// Step#3: whether order by item is in the FD closure of group-by & select list items.
+				if item.SubsetOf(fds.ConstantCols()) {
+					continue
+				}
+				strictClosureOfGroupByCols := fds.ClosureOfStrict(fds.GroupByCols)
+				if item.SubsetOf(strictClosureOfGroupByCols) {
+					continue
+				}
+				strictClosureOfSelectCols := fds.ClosureOfStrict(selectExprsUniqueIDs)
+				if item.SubsetOf(strictClosureOfSelectCols) {
+					continue
+				}
+				// locate the base col that are not in (constant list / group by list / strict fd closure) for error show.
+				baseCols := expression.ExtractColumns(odrItem.Expr)
+				errShowCol := baseCols[0]
+				for _, col := range baseCols {
+					colSet := fd.NewFastIntSet(int(col.UniqueID))
+					if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) {
+						errShowCol = col
+						break
+					}
+				}
+				// better use the schema alias name firstly if any.
+				name := ""
+				for idx, schemaCol := range proj.Schema().Columns {
+					if schemaCol.UniqueID == errShowCol.UniqueID {
+						name = proj.names[idx].String()
+						break
+					}
+				}
+				if name == "" {
+					name = errShowCol.OrigName
+				}
+				return nil, nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInSelect, name)
+			}
+		}
 	}
-	return proj, proj.Exprs, oldLen, nil
+	return proj, orderByItemExprs, oldLen, nil
 }
 
 func (b *PlanBuilder) buildDistinct(child LogicalPlan, length int) (*LogicalAggregation, error) {
@@ -1821,6 +1900,32 @@ func (b *PlanBuilder) buildSort(ctx context.Context, p LogicalPlan, byItems []*a
 	return b.buildSortWithCheck(ctx, p, byItems, aggMapper, windowMapper, nil, 0, false)
 }
 
+func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int,
+	windowMapper map[*ast.WindowFuncExpr]int, projExprs []expression.Expression, oldLen int, hasDistinct bool) ([]*util.ByItems, error) {
+	exprs := make([]*util.ByItems, 0, len(byItems))
+	transformer := &itemTransformer{}
+	for i, item := range byItems {
+		newExpr, _ := item.Expr.Accept(transformer)
+		item.Expr = newExpr.(ast.ExprNode)
+		it, np, err := b.rewriteWithPreprocess(ctx, item.Expr, p, aggMapper, windowMapper, true, nil)
+		if err != nil {
+			return nil, err
+		}
+
+		// check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442
+		if hasDistinct && projExprs != nil {
+			err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen)
+			if err != nil {
+				return nil, err
+			}
+		}
+
+		p = np
+		exprs = append(exprs, &util.ByItems{Expr: it, Desc: item.Desc})
+	}
+	return exprs, nil
+}
+
 func (b *PlanBuilder) buildSortWithCheck(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int,
 	projExprs []expression.Expression, oldLen int, hasDistinct bool) (*LogicalSort, error) {
 	if _, isUnion := p.(*LogicalUnionAll); isUnion {
@@ -1855,6 +1960,18 @@ func (b *PlanBuilder) buildSortWithCheck(ctx context.Context, p LogicalPlan, byI
 	return sort, nil
 }
 
+func (b *PlanBuilder) buildSortWithCheckV2(p LogicalPlan, byItems []*util.ByItems) (*LogicalSort, error) {
+	if _, isUnion := p.(*LogicalUnionAll); isUnion {
+		b.curClause = globalOrderByClause
+	} else {
+		b.curClause = orderByClause
+	}
+	sort := LogicalSort{}.Init(b.ctx, b.getSelectOffset())
+	sort.ByItems = byItems
+	sort.SetChildren(p)
+	return sort, nil
+}
+
 // checkOrderByInDistinct checks whether ORDER BY has conflicts with DISTINCT, see #12442
 func (b *PlanBuilder) checkOrderByInDistinct(byItem *ast.ByItem, idx int, expr expression.Expression, p LogicalPlan, originalExprs []expression.Expression, length int) error {
 	// Check if expressions in ORDER BY whole match some fields in DISTINCT.
@@ -3152,6 +3269,51 @@ func (b *PlanBuilder) checkOnlyFullGroupByWithGroupClause(p LogicalPlan, sel *as
 	return nil
 }
 
+func (b *PlanBuilder) checkOrderByPart1(sel *ast.SelectStmt) error {
+	if sel.GroupBy != nil {
+		// check only-full-group-by 4 order-by clause with group-by clause
+		// Step#1: whether order by item is in the select field list
+		// Step#2: whether order by item is in the group by list
+		// Step#3: whether order by item is in the FD closure of group-by & select list items.
+		// Since FD hasn't been built yet for now, let's delay this logic to checkOrderByPart2.
+	} else {
+		// check only-full-group-by 4 order-by clause without group-by clause
+		// Step1: judge whether this query is an aggregated query.
+		// Step2: there is a agg in order by while sql is not an aggregated query, errors.
+		// Since here doesn't require FD to check, this can take place as soon as possible.
+		// Step1:
+		isNonAggregatedQuery := false
+		resolver := colResolverForOnlyFullGroupBy{
+			firstOrderByAggColIdx: -1,
+		}
+		resolver.curClause = fieldList
+		for idx, field := range sel.Fields.Fields {
+			resolver.exprIdx = idx
+			field.Accept(&resolver)
+		}
+		if sel.Having != nil {
+			sel.Having.Expr.Accept(&resolver)
+		}
+		if !resolver.hasAggFunc {
+			isNonAggregatedQuery = true
+		}
+
+		// Step2:
+		if sel.OrderBy != nil {
+			resolver.curClause = orderByClause
+			for idx, byItem := range sel.OrderBy.Items {
+				resolver.exprIdx = idx
+				byItem.Expr.Accept(&resolver)
+			}
+		}
+		if isNonAggregatedQuery && resolver.firstOrderByAggColIdx != -1 {
+			// SQL like `select a from t where a = 1 order by count(b)` is illegal.
+			return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(resolver.firstOrderByAggColIdx + 1)
+		}
+	}
+	return nil
+}
+
 func (b *PlanBuilder) checkOnlyFullGroupByWithOutGroupClause(p LogicalPlan, sel *ast.SelectStmt) error {
 	resolver := colResolverForOnlyFullGroupBy{
 		firstOrderByAggColIdx: -1,
@@ -3177,7 +3339,7 @@ func (b *PlanBuilder) checkOnlyFullGroupByWithOutGroupClause(p LogicalPlan, sel
 		// SQL like `select a from t where a = 1 order by count(b)` is illegal.
 		return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(resolver.firstOrderByAggColIdx + 1)
 	}
-	if !resolver.hasAggFuncOrAnyValue || len(resolver.nonAggCols) == 0 {
+	if !(resolver.hasAggFunc || resolver.hasAnyValue) || len(resolver.nonAggCols) == 0 {
 		return nil
 	}
 	singleValueColNames := make(map[*types.FieldName]struct{}, len(sel.Fields.Fields))
@@ -3223,7 +3385,8 @@ type colResolverForOnlyFullGroupBy struct {
 	nonAggCols            []*ast.ColumnName
 	exprIdx               int
 	nonAggColIdxs         []int
-	hasAggFuncOrAnyValue  bool
+	hasAggFunc            bool
+	hasAnyValue           bool
 	firstOrderByAggColIdx int
 	curClause             clauseCode
 }
@@ -3231,7 +3394,7 @@ type colResolverForOnlyFullGroupBy struct {
 func (c *colResolverForOnlyFullGroupBy) Enter(node ast.Node) (ast.Node, bool) {
 	switch t := node.(type) {
 	case *ast.AggregateFuncExpr:
-		c.hasAggFuncOrAnyValue = true
+		c.hasAggFunc = true
 		if c.curClause == orderByClause {
 			c.firstOrderByAggColIdx = c.exprIdx
 		}
@@ -3239,7 +3402,7 @@ func (c *colResolverForOnlyFullGroupBy) Enter(node ast.Node) (ast.Node, bool) {
 	case *ast.FuncCallExpr:
 		// enable function `any_value` in aggregation even `ONLY_FULL_GROUP_BY` is set
 		if t.FnName.L == ast.AnyValue {
-			c.hasAggFuncOrAnyValue = true
+			c.hasAnyValue = true
 			return node, true
 		}
 	case *ast.ColumnNameExpr:
@@ -3721,7 +3884,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 		windowAggMap                  map[*ast.AggregateFuncExpr]int
 		correlatedAggMap              map[*ast.AggregateFuncExpr]int
 		gbyCols                       []expression.Expression
-		projExprs                     []expression.Expression
+		orderByItems                  []*util.ByItems
 	)
 
 	// set for update read to true before building result set node
@@ -3762,6 +3925,11 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 		originalFields = sel.Fields.Fields
 	}
 
+	// basic order-by checking, nothing to do with only-full-group-by mode
+	if err := b.checkOrderByPart1(sel); err != nil {
+		return nil, err
+	}
+
 	if sel.GroupBy != nil {
 		p, gbyCols, err = b.resolveGbyExprs(ctx, p, sel.GroupBy, sel.Fields.Fields)
 		if err != nil {
@@ -3865,7 +4033,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 	var oldLen int
 	// According to https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html,
 	// we can only process window functions after having clause, so `considerWindow` is false now.
-	p, projExprs, oldLen, err = b.buildProjection(ctx, p, sel.Fields.Fields, totalMap, nil, false, sel.OrderBy != nil)
+	p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, false, sel.OrderBy != nil)
 	if err != nil {
 		return nil, err
 	}
@@ -3903,7 +4071,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 		// In such case plan `p` is not changed, so we don't have to build another projection.
 		if hasWindowFuncField {
 			// Now we build the window function fields.
-			p, projExprs, oldLen, err = b.buildProjection(ctx, p, sel.Fields.Fields, windowAggMap, windowMapper, true, false)
+			p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, true, false)
 			if err != nil {
 				return nil, err
 			}
@@ -3919,7 +4087,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 
 	if sel.OrderBy != nil {
 		if b.ctx.GetSessionVars().SQLMode.HasOnlyFullGroupBy() {
-			p, err = b.buildSortWithCheck(ctx, p, sel.OrderBy.Items, orderMap, windowMapper, projExprs, oldLen, sel.Distinct)
+			p, err = b.buildSortWithCheckV2(p, orderByItems)
 		} else {
 			p, err = b.buildSort(ctx, p, sel.OrderBy.Items, orderMap, windowMapper)
 		}
diff --git a/planner/funcdep/extract_fd_test.go b/planner/funcdep/extract_fd_test.go
index 84a1533e161eb..99e4dd593698a 100644
--- a/planner/funcdep/extract_fd_test.go
+++ b/planner/funcdep/extract_fd_test.go
@@ -349,9 +349,6 @@ func TestFDSet_MakeOuterJoin(t *testing.T) {
 	ctx := context.TODO()
 	is := testGetIS(ass, tk.Session())
 	for i, tt := range tests {
-		if i == 0 {
-			fmt.Println(1)
-		}
 		comment := fmt.Sprintf("case:%v sql:%s", i, tt.sql)
 		stmt, err := par.ParseOneStmt(tt.sql, "", "")
 		ass.Nil(err, comment)

From b7fc20da4fd852bf1767193d73a941e94ed19cbf Mon Sep 17 00:00:00 2001
From: ailinkid <314806019@qq.com>
Date: Fri, 18 Mar 2022 16:11:48 +0800
Subject: [PATCH 02/13] make test pass

Signed-off-by: ailinkid <314806019@qq.com>
---
 planner/core/logical_plan_builder.go       | 232 ++++++++++++++-------
 planner/funcdep/only_full_group_by_test.go |   6 +-
 2 files changed, 164 insertions(+), 74 deletions(-)

diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index 4ff30dc918e58..71bf0ff705d7f 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -1423,12 +1423,10 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 			// for select * from view (include agg), outer projection don't have to check select list with the inner group-by flag.
 			fds.HasAggBuilt = false
 		}
-		if sel.OrderBy != nil && sel.GroupBy != nil {
-			// check only-full-group-by 4 order-by clause with group-by clause
-			orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, mapper, windowMapper, proj.Exprs, oldLen, sel.Distinct)
-			if err != nil {
-				return nil, nil, 0, err
-			}
+		if strings.HasPrefix(b.ctx.GetSessionVars().StmtCtx.OriginalSQL, "SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1") {
+			fmt.Println(1)
+		}
+		if sel.OrderBy != nil {
 			selectExprsUniqueIDs := fd.NewFastIntSet()
 			for _, expr := range proj.Exprs[:oldLen] {
 				switch x := expr.(type) {
@@ -1443,61 +1441,126 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 				default:
 				}
 			}
-			for offset, odrItem := range orderByItemExprs {
-				item := fd.NewFastIntSet()
-				switch x := odrItem.Expr.(type) {
-				case *expression.Column:
-					item.Insert(int(x.UniqueID))
-				case *expression.ScalarFunction:
-					scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
-					if !ok {
-						panic("selected expr must have been registered, shouldn't be here")
+			// check only-full-group-by 4 order-by clause with group-by clause
+			orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, mapper, windowMapper, proj.Exprs, oldLen, sel.Distinct)
+			if err != nil {
+				return nil, nil, 0, err
+			}
+			if sel.GroupBy != nil {
+				for offset, odrItem := range orderByItemExprs {
+					item := fd.NewFastIntSet()
+					switch x := odrItem.Expr.(type) {
+					case *expression.Column:
+						item.Insert(int(x.UniqueID))
+					case *expression.ScalarFunction:
+						scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
+						if !ok {
+							panic("selected expr must have been registered, shouldn't be here")
+						}
+						item.Insert(scalarUniqueID)
+					default:
 					}
-					item.Insert(scalarUniqueID)
-				default:
-				}
-				// Step#1: whether order by item is in the origin select field list
-				if item.SubsetOf(selectExprsUniqueIDs) {
-					continue
-				}
-				// Step#2: whether order by item is in the group by list
-				if item.SubsetOf(fds.GroupByCols) {
-					continue
-				}
-				// Step#3: whether order by item is in the FD closure of group-by & select list items.
-				if item.SubsetOf(fds.ConstantCols()) {
-					continue
-				}
-				strictClosureOfGroupByCols := fds.ClosureOfStrict(fds.GroupByCols)
-				if item.SubsetOf(strictClosureOfGroupByCols) {
-					continue
-				}
-				strictClosureOfSelectCols := fds.ClosureOfStrict(selectExprsUniqueIDs)
-				if item.SubsetOf(strictClosureOfSelectCols) {
-					continue
-				}
-				// locate the base col that are not in (constant list / group by list / strict fd closure) for error show.
-				baseCols := expression.ExtractColumns(odrItem.Expr)
-				errShowCol := baseCols[0]
-				for _, col := range baseCols {
-					colSet := fd.NewFastIntSet(int(col.UniqueID))
-					if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) {
-						errShowCol = col
-						break
+					// Step#1: whether order by item is in the origin select field list
+					if item.SubsetOf(selectExprsUniqueIDs) {
+						continue
 					}
-				}
-				// better use the schema alias name firstly if any.
-				name := ""
-				for idx, schemaCol := range proj.Schema().Columns {
-					if schemaCol.UniqueID == errShowCol.UniqueID {
-						name = proj.names[idx].String()
-						break
+					// Step#2: whether order by item is in the group by list
+					if item.SubsetOf(fds.GroupByCols) {
+						continue
+					}
+					// Step#3: whether order by item is in the FD closure of group-by & select list items.
+					if item.SubsetOf(fds.ConstantCols()) {
+						continue
+					}
+					strictClosureOfGroupByCols := fds.ClosureOfStrict(fds.GroupByCols)
+					if item.SubsetOf(strictClosureOfGroupByCols) {
+						continue
+					}
+					strictClosureOfSelectCols := fds.ClosureOfStrict(selectExprsUniqueIDs)
+					if item.SubsetOf(strictClosureOfSelectCols) {
+						continue
+					}
+					// locate the base col that are not in (constant list / group by list / strict fd closure) for error show.
+					baseCols := expression.ExtractColumns(odrItem.Expr)
+					errShowCol := baseCols[0]
+					for _, col := range baseCols {
+						colSet := fd.NewFastIntSet(int(col.UniqueID))
+						if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) {
+							errShowCol = col
+							break
+						}
+					}
+					// better use the schema alias name firstly if any.
+					name := ""
+					for idx, schemaCol := range proj.Schema().Columns {
+						if schemaCol.UniqueID == errShowCol.UniqueID {
+							name = proj.names[idx].String()
+							break
+						}
 					}
+					if name == "" {
+						name = errShowCol.OrigName
+					}
+					return nil, nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInOrderBy, name)
 				}
-				if name == "" {
-					name = errShowCol.OrigName
+			}
+			if sel.Distinct {
+				// order-by with distinct case
+				// Rule #1: order by item should be in the select filed list
+				// Rule #2: the base col that order by item dependent on should be in the select field list
+				for offset, odrItem := range orderByItemExprs {
+					item := fd.NewFastIntSet()
+					switch x := odrItem.Expr.(type) {
+					case *expression.Column:
+						item.Insert(int(x.UniqueID))
+					case *expression.ScalarFunction:
+						scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
+						if !ok {
+							panic("selected expr must have been registered, shouldn't be here")
+						}
+						item.Insert(scalarUniqueID)
+					default:
+					}
+					// Rule #1
+					if item.SubsetOf(selectExprsUniqueIDs) {
+						continue
+					}
+					// Rule #2
+					baseCols := expression.ExtractColumns(odrItem.Expr)
+					colSet := fd.NewFastIntSet()
+					for _, col := range baseCols {
+						colSet.Insert(int(col.UniqueID))
+					}
+					if colSet.SubsetOf(selectExprsUniqueIDs) {
+						continue
+					}
+					// find that error base col
+					errShowCol := baseCols[0]
+					for _, col := range baseCols {
+						colSet := fd.NewFastIntSet()
+						colSet.Insert(int(col.UniqueID))
+						if !colSet.SubsetOf(selectExprsUniqueIDs) {
+							errShowCol = col
+							break
+						}
+					}
+					// better use the schema alias name firstly if any.
+					name := ""
+					for idx, schemaCol := range proj.Schema().Columns {
+						if schemaCol.UniqueID == errShowCol.UniqueID {
+							name = proj.names[idx].String()
+							break
+						}
+					}
+					if name == "" {
+						name = errShowCol.OrigName
+					}
+					if _, ok := sel.OrderBy.Items[offset].Expr.(*ast.AggregateFuncExpr); ok {
+						return nil, nil, 0, ErrAggregateInOrderNotSelect.GenWithStackByArgs(offset+1, "DISTINCT")
+					}
+					// select distinct count(a) from t group by b order by sum(a);          ✗
+					return nil, nil, 0, ErrFieldInOrderNotSelect.GenWithStackByArgs(offset+1, name, "DISTINCT")
 				}
-				return nil, nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInSelect, name)
 			}
 		}
 	}
@@ -1904,7 +1967,7 @@ func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byIt
 	windowMapper map[*ast.WindowFuncExpr]int, projExprs []expression.Expression, oldLen int, hasDistinct bool) ([]*util.ByItems, error) {
 	exprs := make([]*util.ByItems, 0, len(byItems))
 	transformer := &itemTransformer{}
-	for i, item := range byItems {
+	for _, item := range byItems {
 		newExpr, _ := item.Expr.Accept(transformer)
 		item.Expr = newExpr.(ast.ExprNode)
 		it, np, err := b.rewriteWithPreprocess(ctx, item.Expr, p, aggMapper, windowMapper, true, nil)
@@ -1913,12 +1976,12 @@ func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byIt
 		}
 
 		// check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442
-		if hasDistinct && projExprs != nil {
-			err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen)
-			if err != nil {
-				return nil, err
-			}
-		}
+		//if hasDistinct && projExprs != nil {
+		//	err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen)
+		//	if err != nil {
+		//		return nil, err
+		//	}
+		//}
 
 		p = np
 		exprs = append(exprs, &util.ByItems{Expr: it, Desc: item.Desc})
@@ -2140,6 +2203,10 @@ func matchField(f *ast.SelectField, col *ast.ColumnNameExpr, ignoreAsName bool)
 	return false
 }
 
+func resolveFromOuterSchema(v *ast.ColumnNameExpr) bool {
+	return false
+}
+
 func resolveFromSelectFields(v *ast.ColumnNameExpr, fields []*ast.SelectField, ignoreAsName bool) (index int, err error) {
 	var matchedExpr ast.ExprNode
 	index = -1
@@ -3269,7 +3336,7 @@ func (b *PlanBuilder) checkOnlyFullGroupByWithGroupClause(p LogicalPlan, sel *as
 	return nil
 }
 
-func (b *PlanBuilder) checkOrderByPart1(sel *ast.SelectStmt) error {
+func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt, p LogicalPlan) error {
 	if sel.GroupBy != nil {
 		// check only-full-group-by 4 order-by clause with group-by clause
 		// Step#1: whether order by item is in the select field list
@@ -3299,16 +3366,38 @@ func (b *PlanBuilder) checkOrderByPart1(sel *ast.SelectStmt) error {
 		}
 
 		// Step2:
-		if sel.OrderBy != nil {
+		if sel.OrderBy != nil && isNonAggregatedQuery {
+			// YES: SQL like `select a from t where a = 1 order by count(b)` is illegal.
+			// NO: SELECT t1.a FROM t1 GROUP BY t1.a HAVING t1.a IN (SELECT t2.a FROM t2 ORDER BY SUM(t1.b)) is ok.
+			// judge whether the aggregated func in order by item is referred to outer schema.
 			resolver.curClause = orderByClause
+			resolver.orderByAggMap = make(map[*ast.AggregateFuncExpr]int, len(sel.OrderBy.Items))
 			for idx, byItem := range sel.OrderBy.Items {
 				resolver.exprIdx = idx
 				byItem.Expr.Accept(&resolver)
 			}
-		}
-		if isNonAggregatedQuery && resolver.firstOrderByAggColIdx != -1 {
-			// SQL like `select a from t where a = 1 order by count(b)` is illegal.
-			return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(resolver.firstOrderByAggColIdx + 1)
+			// has no order-by agg.
+			if len(resolver.orderByAggMap) == 0 {
+				return nil
+			}
+			// has some order-by agg.
+			for k, v := range resolver.orderByAggMap {
+				var cols []*expression.Column
+				for _, arg := range k.Args {
+					newArg, np, err := b.rewrite(ctx, arg, p, nil, true)
+					if err != nil {
+						return err
+					}
+					p = np
+					cols = append(cols, expression.ExtractColumns(newArg)...)
+				}
+				// use outer logical query block correlated cols --- valid
+				if len(cols) == 0 {
+					continue
+				}
+				// use logical query block cols --- invalid
+				return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(v + 1)
+			}
 		}
 	}
 	return nil
@@ -3389,6 +3478,7 @@ type colResolverForOnlyFullGroupBy struct {
 	hasAnyValue           bool
 	firstOrderByAggColIdx int
 	curClause             clauseCode
+	orderByAggMap         map[*ast.AggregateFuncExpr]int
 }
 
 func (c *colResolverForOnlyFullGroupBy) Enter(node ast.Node) (ast.Node, bool) {
@@ -3396,7 +3486,7 @@ func (c *colResolverForOnlyFullGroupBy) Enter(node ast.Node) (ast.Node, bool) {
 	case *ast.AggregateFuncExpr:
 		c.hasAggFunc = true
 		if c.curClause == orderByClause {
-			c.firstOrderByAggColIdx = c.exprIdx
+			c.orderByAggMap[t] = c.exprIdx
 		}
 		return node, true
 	case *ast.FuncCallExpr:
@@ -3926,7 +4016,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 	}
 
 	// basic order-by checking, nothing to do with only-full-group-by mode
-	if err := b.checkOrderByPart1(sel); err != nil {
+	if err := b.checkOrderByPart1(ctx, sel, p); err != nil {
 		return nil, err
 	}
 
diff --git a/planner/funcdep/only_full_group_by_test.go b/planner/funcdep/only_full_group_by_test.go
index cbb4c9eb6a50f..b519436beabff 100644
--- a/planner/funcdep/only_full_group_by_test.go
+++ b/planner/funcdep/only_full_group_by_test.go
@@ -72,13 +72,13 @@ func TestOnlyFullGroupByOldCases(t *testing.T) {
 	require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t1.c' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by")
 	_, err = tk.Exec("SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1;")
 	require.NotNil(t, err)
-	require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t.d' which is not in SELECT list; this is incompatible with DISTINCT")
+	require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t1.d' which is not in SELECT list; this is incompatible with DISTINCT")
 	_, err = tk.Exec("SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1;")
 	require.NotNil(t, err)
-	require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t.d' which is not in SELECT list; this is incompatible with DISTINCT")
+	require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t1.d' which is not in SELECT list; this is incompatible with DISTINCT")
 	_, err = tk.Exec("SELECT (SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1) FROM t as t2;")
 	require.NotNil(t, err)
-	require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t.d' which is not in SELECT list; this is incompatible with DISTINCT")
+	require.Equal(t, err.Error(), "[planner:3065]Expression #1 of ORDER BY clause is not in SELECT list, references column 'test.t1.d' which is not in SELECT list; this is incompatible with DISTINCT")
 
 	// test case 7
 	tk.MustExec("drop table if exists t")

From 2e1c49f0b13b3c9eee84faae8b5e0a4dc2eea1f0 Mon Sep 17 00:00:00 2001
From: ailinkid <314806019@qq.com>
Date: Sat, 19 Mar 2022 22:55:55 +0800
Subject: [PATCH 03/13] fix orm django agg test case

Signed-off-by: ailinkid <314806019@qq.com>
---
 planner/core/logical_plan_builder.go | 40 +++++++++++++++++-----
 planner/core/logical_plans.go        | 51 ++++++++++++++++++++++++----
 planner/core/plan.go                 |  7 +++-
 planner/funcdep/fd_graph.go          | 25 ++++++++------
 4 files changed, 96 insertions(+), 27 deletions(-)

diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index 71bf0ff705d7f..c56547df032c8 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -1236,7 +1236,7 @@ func findColFromNaturalUsingJoin(p LogicalPlan, col *expression.Column) (name *t
 
 // buildProjection returns a Projection plan and non-aux columns length.
 func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *ast.SelectStmt, mapper map[*ast.AggregateFuncExpr]int,
-	windowMapper map[*ast.WindowFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, []*util.ByItems, int, error) {
+	windowMapper map[*ast.WindowFuncExpr]int, orderMapper map[*ast.AggregateFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, []*util.ByItems, int, error) {
 	fields := sel.Fields.Fields
 	err := b.preprocessUserVarTypes(ctx, p, fields, mapper)
 	if err != nil {
@@ -1423,9 +1423,6 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 			// for select * from view (include agg), outer projection don't have to check select list with the inner group-by flag.
 			fds.HasAggBuilt = false
 		}
-		if strings.HasPrefix(b.ctx.GetSessionVars().StmtCtx.OriginalSQL, "SELECT DISTINCT t1.a FROM t as t1 ORDER BY t1.d LIMIT 1") {
-			fmt.Println(1)
-		}
 		if sel.OrderBy != nil {
 			selectExprsUniqueIDs := fd.NewFastIntSet()
 			for _, expr := range proj.Exprs[:oldLen] {
@@ -1442,7 +1439,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 				}
 			}
 			// check only-full-group-by 4 order-by clause with group-by clause
-			orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, mapper, windowMapper, proj.Exprs, oldLen, sel.Distinct)
+			orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, orderMapper, windowMapper, proj.Exprs, oldLen, sel.Distinct)
 			if err != nil {
 				return nil, nil, 0, err
 			}
@@ -1453,11 +1450,15 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 					case *expression.Column:
 						item.Insert(int(x.UniqueID))
 					case *expression.ScalarFunction:
+						// order by item may not be projected as a column in projection, allocated a new one
 						scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
 						if !ok {
-							panic("selected expr must have been registered, shouldn't be here")
+							// panic("selected expr must have been registered, shouldn't be here")
+							scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID())
 						}
 						item.Insert(scalarUniqueID)
+					case *expression.Constant:
+						// order by null/false/true can always be ok, let item be empty.
 					default:
 					}
 					// Step#1: whether order by item is in the origin select field list
@@ -1482,6 +1483,21 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 					}
 					// locate the base col that are not in (constant list / group by list / strict fd closure) for error show.
 					baseCols := expression.ExtractColumns(odrItem.Expr)
+					if len(baseCols) == 0 {
+						// order by item has no reference of base col, like order by abs(1) and rand()
+						continue
+					}
+					// GROUP BY t1.a, t2.b ORDER BY COALESCE(MIN(t.c), t2.b), agg min is determined by group-col definitely.
+					baseColsUniqueIDs := fd.NewFastIntSet()
+					for _, bc := range baseCols {
+						baseColsUniqueIDs.Insert(int(bc.UniqueID))
+					}
+					if baseColsUniqueIDs.SubsetOf(strictClosureOfGroupByCols) {
+						continue
+					}
+					if baseColsUniqueIDs.SubsetOf(strictClosureOfSelectCols) {
+						continue
+					}
 					errShowCol := baseCols[0]
 					for _, col := range baseCols {
 						colSet := fd.NewFastIntSet(int(col.UniqueID))
@@ -1514,9 +1530,11 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 					case *expression.Column:
 						item.Insert(int(x.UniqueID))
 					case *expression.ScalarFunction:
+						// order by item may not be projected as a column in projection, allocated a new one
 						scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
 						if !ok {
-							panic("selected expr must have been registered, shouldn't be here")
+							// panic("selected expr must have been registered, shouldn't be here")
+							scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID())
 						}
 						item.Insert(scalarUniqueID)
 					default:
@@ -1527,6 +1545,10 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 					}
 					// Rule #2
 					baseCols := expression.ExtractColumns(odrItem.Expr)
+					if len(baseCols) == 0 {
+						// order by item has no reference of base col, like order by abs(1) and rand()
+						continue
+					}
 					colSet := fd.NewFastIntSet()
 					for _, col := range baseCols {
 						colSet.Insert(int(col.UniqueID))
@@ -4123,7 +4145,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 	var oldLen int
 	// According to https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html,
 	// we can only process window functions after having clause, so `considerWindow` is false now.
-	p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, false, sel.OrderBy != nil)
+	p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, orderMap, false, sel.OrderBy != nil)
 	if err != nil {
 		return nil, err
 	}
@@ -4161,7 +4183,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 		// In such case plan `p` is not changed, so we don't have to build another projection.
 		if hasWindowFuncField {
 			// Now we build the window function fields.
-			p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, true, false)
+			p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, orderMap, true, false)
 			if err != nil {
 				return nil, err
 			}
diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
index fe869760202cd..04d9ae146542d 100644
--- a/planner/core/logical_plans.go
+++ b/planner/core/logical_plans.go
@@ -15,6 +15,7 @@
 package core
 
 import (
+	"fmt"
 	"math"
 
 	"github.com/pingcap/tidb/expression"
@@ -198,6 +199,11 @@ func (p *LogicalJoin) extractFDForSemiJoin(filtersFromApply []expression.Express
 	// 1: since semi join will keep the part or all rows of the outer table, it's outer FD can be saved.
 	// 2: the un-projected column will be left for the upper layer projection or already be pruned from bottom up.
 	outerFD, _ := p.children[0].ExtractFD(), p.children[1].ExtractFD()
+	outerAcrossBlock := p.SelectBlockOffset() != p.children[0].SelectBlockOffset()
+	if outerAcrossBlock {
+		outerFD.HasAggBuilt = false
+		outerFD.GroupByCols.Clear()
+	}
 	fds := outerFD
 
 	eqCondSlice := expression.ScalarFuncs2Exprs(p.EqualConditions)
@@ -215,6 +221,11 @@ func (p *LogicalJoin) extractFDForSemiJoin(filtersFromApply []expression.Express
 
 func (p *LogicalJoin) extractFDForInnerJoin(filtersFromApply []expression.Expression) *fd.FDSet {
 	leftFD, rightFD := p.children[0].ExtractFD(), p.children[1].ExtractFD()
+	leftAcrossBlock, rightAcrossBlock := p.SelectBlockOffset() != p.children[0].SelectBlockOffset(), p.SelectBlockOffset() != p.children[1].SelectBlockOffset()
+	if leftAcrossBlock {
+		leftFD.HasAggBuilt = false
+		leftFD.GroupByCols.Clear()
+	}
 	fds := leftFD
 	fds.MakeCartesianProduct(rightFD)
 
@@ -245,16 +256,19 @@ func (p *LogicalJoin) extractFDForInnerJoin(filtersFromApply []expression.Expres
 			fds.HashCodeToUniqueID[k] = v
 		}
 	}
-	for i, ok := rightFD.GroupByCols.Next(0); ok; i, ok = rightFD.GroupByCols.Next(i + 1) {
-		fds.GroupByCols.Insert(i)
+	if !rightAcrossBlock {
+		for i, ok := rightFD.GroupByCols.Next(0); ok; i, ok = rightFD.GroupByCols.Next(i + 1) {
+			fds.GroupByCols.Insert(i)
+		}
+		fds.HasAggBuilt = fds.HasAggBuilt || rightFD.HasAggBuilt
 	}
-	fds.HasAggBuilt = fds.HasAggBuilt || rightFD.HasAggBuilt
 	p.fdSet = fds
 	return fds
 }
 
 func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expression) *fd.FDSet {
 	outerFD, innerFD := p.children[0].ExtractFD(), p.children[1].ExtractFD()
+	outerAcrossBlock, innerAcrossBlock := p.SelectBlockOffset() != p.children[0].SelectBlockOffset(), p.SelectBlockOffset() != p.children[1].SelectBlockOffset()
 	innerCondition := p.RightConditions
 	outerCondition := p.LeftConditions
 	outerCols, innerCols := fd.NewFastIntSet(), fd.NewFastIntSet()
@@ -266,6 +280,7 @@ func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expres
 	}
 	if p.JoinType == RightOuterJoin {
 		innerFD, outerFD = outerFD, innerFD
+		outerAcrossBlock, innerAcrossBlock = innerAcrossBlock, outerAcrossBlock
 		innerCondition = p.LeftConditions
 		outerCondition = p.RightConditions
 		innerCols, outerCols = outerCols, innerCols
@@ -346,9 +361,12 @@ func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expres
 			}
 		}
 	}
-
+	if outerAcrossBlock {
+		outerFD.HasAggBuilt = false
+		outerFD.GroupByCols.Clear()
+	}
 	fds := outerFD
-	fds.MakeOuterJoin(innerFD, filterFD, outerCols, innerCols, &opt)
+	fds.MakeOuterJoin(innerFD, filterFD, outerCols, innerCols, &opt, innerAcrossBlock)
 	p.fdSet = fds
 	return fds
 }
@@ -966,7 +984,7 @@ func (p *LogicalSelection) ExtractFD() *fd.FDSet {
 	// join's schema will miss t2.a while join.full schema has. since selection
 	// itself doesn't contain schema, extracting schema should tell them apart.
 	var columns []*expression.Column
-	if join, ok := p.children[0].(*LogicalJoin); ok {
+	if join, ok := p.children[0].(*LogicalJoin); ok && join.fullSchema != nil {
 		columns = join.fullSchema.Columns
 	} else {
 		columns = p.Schema().Columns
@@ -1059,11 +1077,30 @@ func (la *LogicalApply) ExtractFD() *fd.FDSet {
 			}
 		}
 	}
+	// select (select t1.a from t1 where t1.rid = t2.id), count(t2.b) from t2 group by (t2.id)
+	// for correlated scalar sub-query, the whole sub-query will be projected as a new column for example X here.
+	// while for every same t2.id, this sub-query's scalar output must be the same, actually it's a kind of strict FD here.
+	applyStrictDetermine := fd.NewFastIntSet()
+	applyStrictDependency := fd.NewFastIntSet()
+	if innerPlan.Schema().Len() == 1 {
+		// single column in apply join inner side will be output directly.
+		for _, cc := range deduplicateCorrelatedCols {
+			applyStrictDetermine.Insert(int(cc.UniqueID))
+		}
+		applyStrictDependency.Insert(int(innerPlan.Schema().Columns[0].UniqueID))
+
+	} else {
+		// multi columns will be wrapped as a new row func, actually errors like: Operand should contain 1 column(s)
+	}
+
 	switch la.JoinType {
 	case InnerJoin:
 		return la.extractFDForInnerJoin(eqCond)
 	case LeftOuterJoin, RightOuterJoin:
-		return la.extractFDForOuterJoin(eqCond)
+		fds := la.extractFDForOuterJoin(eqCond)
+		fds.AddStrictFunctionalDependency(applyStrictDetermine, applyStrictDependency)
+		fmt.Println("cao: from", applyStrictDetermine.String(), "to", applyStrictDependency)
+		return fds
 	case SemiJoin:
 		return la.extractFDForSemiJoin(eqCond)
 	default:
diff --git a/planner/core/plan.go b/planner/core/plan.go
index 44cb793862800..216dc4c3c1654 100644
--- a/planner/core/plan.go
+++ b/planner/core/plan.go
@@ -386,8 +386,13 @@ func (p *baseLogicalPlan) ExtractFD() *fd.FDSet {
 		return p.fdSet
 	}
 	fds := &fd.FDSet{HashCodeToUniqueID: make(map[string]int)}
+	// isolation between different logical selection blocks.
+	acrossBlock := false
 	for _, ch := range p.children {
-		fds.AddFrom(ch.ExtractFD())
+		if p.SelectBlockOffset() != ch.SelectBlockOffset() {
+			acrossBlock = true
+		}
+		fds.AddFrom(ch.ExtractFD(), acrossBlock)
 	}
 	return fds
 }
diff --git a/planner/funcdep/fd_graph.go b/planner/funcdep/fd_graph.go
index 22c90bcab4411..243f2eb4f8043 100644
--- a/planner/funcdep/fd_graph.go
+++ b/planner/funcdep/fd_graph.go
@@ -57,6 +57,7 @@ type FDSet struct {
 	//
 	// why not just makeNotNull of them, because even a non-equiv-related inner col can also
 	// refuse supplied null values.
+	// todo: when multi join and across select block, this may need to be maintained more precisely.
 	Rule333Equiv struct {
 		Edges     []*fdEdge
 		InnerCols FastIntSet
@@ -672,14 +673,14 @@ func (s *FDSet) MakeCartesianProduct(rhs *FDSet) {
 //      - If the right side has no row, we would supply null-extended rows, then the value of any column is NULL, the equivalence class exists.
 //      - If the right side has rows, no row is filtered out after the filters since no row of the outer side is filtered out. Hence, the equivalence class is still remained.
 //
-func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols FastIntSet, opt *ArgOpts) {
+func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols FastIntSet, opt *ArgOpts, innerAcrossBlock bool) {
 	//  copy down the left PK and right PK before the s has changed for later usage.
 	leftPK, ok1 := s.FindPrimaryKey()
 	rightPK, ok2 := innerFDs.FindPrimaryKey()
 	copyLeftFDSet := &FDSet{}
-	copyLeftFDSet.AddFrom(s)
+	copyLeftFDSet.AddFrom(s, false)
 	copyRightFDSet := &FDSet{}
-	copyRightFDSet.AddFrom(innerFDs)
+	copyRightFDSet.AddFrom(innerFDs, false)
 
 	for _, edge := range innerFDs.fdEdges {
 		// Rule #2.2, constant FD are removed from right side of left join.
@@ -816,10 +817,12 @@ func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols F
 			s.HashCodeToUniqueID[k] = v
 		}
 	}
-	for i, ok := innerFDs.GroupByCols.Next(0); ok; i, ok = innerFDs.GroupByCols.Next(i + 1) {
-		s.GroupByCols.Insert(i)
+	if !innerAcrossBlock {
+		for i, ok := innerFDs.GroupByCols.Next(0); ok; i, ok = innerFDs.GroupByCols.Next(i + 1) {
+			s.GroupByCols.Insert(i)
+		}
+		s.HasAggBuilt = s.HasAggBuilt || innerFDs.HasAggBuilt
 	}
-	s.HasAggBuilt = s.HasAggBuilt || innerFDs.HasAggBuilt
 }
 
 func (s *FDSet) MakeRestoreRule333() {
@@ -879,7 +882,7 @@ func (s FDSet) AllCols() FastIntSet {
 // AddFrom merges two FD sets by adding each FD from the given set to this set.
 // Since two different tables may have some column ID overlap, we better use
 // column unique ID to build the FDSet instead.
-func (s *FDSet) AddFrom(fds *FDSet) {
+func (s *FDSet) AddFrom(fds *FDSet, acrossBlock bool) {
 	for i := range fds.fdEdges {
 		fd := fds.fdEdges[i]
 		if fd.equiv {
@@ -903,10 +906,12 @@ func (s *FDSet) AddFrom(fds *FDSet) {
 			s.HashCodeToUniqueID[k] = v
 		}
 	}
-	for i, ok := fds.GroupByCols.Next(0); ok; i, ok = fds.GroupByCols.Next(i + 1) {
-		s.GroupByCols.Insert(i)
+	if !acrossBlock {
+		for i, ok := fds.GroupByCols.Next(0); ok; i, ok = fds.GroupByCols.Next(i + 1) {
+			s.GroupByCols.Insert(i)
+		}
+		s.HasAggBuilt = fds.HasAggBuilt
 	}
-	s.HasAggBuilt = fds.HasAggBuilt
 	s.Rule333Equiv = fds.Rule333Equiv
 }
 

From e2f712f20e3f18af577be686b8204820f80b668d Mon Sep 17 00:00:00 2001
From: ailinkid <314806019@qq.com>
Date: Wed, 23 Mar 2022 17:18:34 +0800
Subject: [PATCH 04/13] fix order by should see new p.schema after order item
 is built

Signed-off-by: ailinkid <314806019@qq.com>
---
 planner/core/logical_plan_builder.go | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index c56547df032c8..f6be15ad021a1 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -1321,6 +1321,8 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 		}
 	}
 	proj.SetChildren(p)
+	var resultP LogicalPlan
+	resultP = proj
 	// delay the only-full-group-by-check in create view statement to later query.
 	var orderByItemExprs []*util.ByItems
 	if !b.isCreateView {
@@ -1438,8 +1440,11 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 				default:
 				}
 			}
+			if strings.HasPrefix(b.ctx.GetSessionVars().StmtCtx.OriginalSQL, "SELECT SUM(a) FROM t1  ORDER BY (SELECT") {
+				fmt.Println(1)
+			}
 			// check only-full-group-by 4 order-by clause with group-by clause
-			orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, orderMapper, windowMapper, proj.Exprs, oldLen, sel.Distinct)
+			resultP, orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, orderMapper, windowMapper, proj.Exprs, oldLen, sel.Distinct)
 			if err != nil {
 				return nil, nil, 0, err
 			}
@@ -1586,7 +1591,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 			}
 		}
 	}
-	return proj, orderByItemExprs, oldLen, nil
+	return resultP, orderByItemExprs, oldLen, nil
 }
 
 func (b *PlanBuilder) buildDistinct(child LogicalPlan, length int) (*LogicalAggregation, error) {
@@ -1986,7 +1991,7 @@ func (b *PlanBuilder) buildSort(ctx context.Context, p LogicalPlan, byItems []*a
 }
 
 func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int,
-	windowMapper map[*ast.WindowFuncExpr]int, projExprs []expression.Expression, oldLen int, hasDistinct bool) ([]*util.ByItems, error) {
+	windowMapper map[*ast.WindowFuncExpr]int, projExprs []expression.Expression, oldLen int, hasDistinct bool) (LogicalPlan, []*util.ByItems, error) {
 	exprs := make([]*util.ByItems, 0, len(byItems))
 	transformer := &itemTransformer{}
 	for _, item := range byItems {
@@ -1994,7 +1999,7 @@ func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byIt
 		item.Expr = newExpr.(ast.ExprNode)
 		it, np, err := b.rewriteWithPreprocess(ctx, item.Expr, p, aggMapper, windowMapper, true, nil)
 		if err != nil {
-			return nil, err
+			return nil, nil, err
 		}
 
 		// check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442
@@ -2008,7 +2013,7 @@ func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byIt
 		p = np
 		exprs = append(exprs, &util.ByItems{Expr: it, Desc: item.Desc})
 	}
-	return exprs, nil
+	return p, exprs, nil
 }
 
 func (b *PlanBuilder) buildSortWithCheck(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int,
@@ -3368,7 +3373,7 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt
 	} else {
 		// check only-full-group-by 4 order-by clause without group-by clause
 		// Step1: judge whether this query is an aggregated query.
-		// Step2: there is a agg in order by while sql is not an aggregated query, errors.
+		// Step2: there is an agg in order by while sql is not an aggregated query, errors.
 		// Since here doesn't require FD to check, this can take place as soon as possible.
 		// Step1:
 		isNonAggregatedQuery := false

From 20e1a234539938592dd52a2eca6761eb71c2e71c Mon Sep 17 00:00:00 2001
From: ailinkid <314806019@qq.com>
Date: Thu, 24 Mar 2022 17:18:22 +0800
Subject: [PATCH 05/13] fix corrleated column in order by clause can't be
 name-resolved

Signed-off-by: ailinkid <314806019@qq.com>
---
 planner/core/logical_plan_builder.go       | 82 ++++++++++++++++++----
 planner/funcdep/fd_graph_ported_test.go    |  2 +-
 planner/funcdep/only_full_group_by_test.go | 14 ++++
 3 files changed, 82 insertions(+), 16 deletions(-)

diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index f6be15ad021a1..0d52e583e1b61 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -1440,9 +1440,6 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 				default:
 				}
 			}
-			if strings.HasPrefix(b.ctx.GetSessionVars().StmtCtx.OriginalSQL, "SELECT SUM(a) FROM t1  ORDER BY (SELECT") {
-				fmt.Println(1)
-			}
 			// check only-full-group-by 4 order-by clause with group-by clause
 			resultP, orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, orderMapper, windowMapper, proj.Exprs, oldLen, sel.Distinct)
 			if err != nil {
@@ -3377,38 +3374,60 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt
 		// Since here doesn't require FD to check, this can take place as soon as possible.
 		// Step1:
 		isNonAggregatedQuery := false
-		resolver := colResolverForOnlyFullGroupBy{
+		resolver1 := colResolverForOnlyFullGroupBy{
 			firstOrderByAggColIdx: -1,
 		}
-		resolver.curClause = fieldList
+		resolver1.curClause = fieldList
 		for idx, field := range sel.Fields.Fields {
-			resolver.exprIdx = idx
-			field.Accept(&resolver)
+			resolver1.exprIdx = idx
+			field.Accept(&resolver1)
 		}
 		if sel.Having != nil {
-			sel.Having.Expr.Accept(&resolver)
+			sel.Having.Expr.Accept(&resolver1)
 		}
-		if !resolver.hasAggFunc {
+		if !resolver1.hasAggFunc {
 			isNonAggregatedQuery = true
 		}
 
 		// Step2:
 		if sel.OrderBy != nil && isNonAggregatedQuery {
+			// Temporarily solve the case like: SELECT a FROM t1 ORDER BY (SELECT COUNT(t2.a) FROM t1 AS t2);
+			// We need to detect the correlated agg in order by clause, and report error in the outer query.
+			resolver2 := &correlatedAggregateResolver{
+				ctx:                ctx,
+				b:                  b,
+				outerPlan:          p,
+				correlatedAggFuncs: make([]*ast.AggregateFuncExpr, 0, len(sel.OrderBy.Items)),
+			}
 			// YES: SQL like `select a from t where a = 1 order by count(b)` is illegal.
 			// NO: SELECT t1.a FROM t1 GROUP BY t1.a HAVING t1.a IN (SELECT t2.a FROM t2 ORDER BY SUM(t1.b)) is ok.
 			// judge whether the aggregated func in order by item is referred to outer schema.
-			resolver.curClause = orderByClause
-			resolver.orderByAggMap = make(map[*ast.AggregateFuncExpr]int, len(sel.OrderBy.Items))
+			resolver1.curClause = orderByClause
+			resolver1.orderByAggMap = make(map[*ast.AggregateFuncExpr]int, len(sel.OrderBy.Items))
 			for idx, byItem := range sel.OrderBy.Items {
-				resolver.exprIdx = idx
-				byItem.Expr.Accept(&resolver)
+				resolver1.exprIdx = idx
+				byItem.Expr.Accept(&resolver1)
+
+				// detect the correlated agg in sub query.
+				_, ok := byItem.Expr.Accept(resolver2)
+				if !ok {
+					return resolver2.err
+				}
+				if len(resolver2.correlatedAggFuncs) > 0 {
+					for _, one := range resolver2.correlatedAggFuncs {
+						if _, ok := resolver1.orderByAggMap[one]; !ok {
+							resolver1.orderByAggMap[one] = idx
+						}
+					}
+					resolver2.correlatedAggFuncs = resolver2.correlatedAggFuncs[:0]
+				}
 			}
 			// has no order-by agg.
-			if len(resolver.orderByAggMap) == 0 {
+			if len(resolver1.orderByAggMap) == 0 {
 				return nil
 			}
 			// has some order-by agg.
-			for k, v := range resolver.orderByAggMap {
+			for k, v := range resolver1.orderByAggMap {
 				var cols []*expression.Column
 				for _, arg := range k.Args {
 					newArg, np, err := b.rewrite(ctx, arg, p, nil, true)
@@ -3426,6 +3445,39 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt
 				return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(v + 1)
 			}
 		}
+		// we need extract all current-level-correlated columns from order by sub-query item to select fields if it has.
+		if sel.OrderBy != nil {
+			for _, byItem := range sel.OrderBy.Items {
+				if _, ok := byItem.Expr.(*ast.SubqueryExpr); ok {
+					// correlated agg will be extracted completely latter.
+					_, np, err := b.rewrite(ctx, byItem.Expr, p, nil, true)
+					if err != nil {
+						return err
+					}
+					correlatedCols := ExtractCorrelatedCols4LogicalPlan(np)
+					for _, cone := range correlatedCols {
+						for idx, pone := range p.Schema().Columns {
+							if cone.UniqueID == pone.UniqueID {
+								pname := p.OutputNames()[idx]
+								colName := &ast.ColumnName{
+									Schema: pname.DBName,
+									Table:  pname.TblName,
+									Name:   pname.ColName,
+								}
+								sel.Fields.Fields = append(sel.Fields.Fields, &ast.SelectField{
+									Auxiliary:         true,
+									AuxiliaryColInAgg: true,
+									Expr:              &ast.ColumnNameExpr{Name: colName},
+								})
+							}
+						}
+					}
+					// p = np, don't construct apply and substitute p here once there is a sub-query
+					// Because here is just like a pre-procession of pulling correlated column from sub-query to outer select fields.
+					// don't change the basic plan
+				}
+			}
+		}
 	}
 	return nil
 }
diff --git a/planner/funcdep/fd_graph_ported_test.go b/planner/funcdep/fd_graph_ported_test.go
index 3ac8371e98c7f..939617e62d128 100644
--- a/planner/funcdep/fd_graph_ported_test.go
+++ b/planner/funcdep/fd_graph_ported_test.go
@@ -65,7 +65,7 @@ func TestFuncDeps_ColsAreKey(t *testing.T) {
 	loj = *abcde
 	loj.MakeCartesianProduct(mnpq)
 	loj.AddConstants(NewFastIntSet(3))
-	loj.MakeOuterJoin(&FDSet{}, &FDSet{}, preservedCols, nullExtendedCols, nil)
+	loj.MakeOuterJoin(&FDSet{}, &FDSet{}, preservedCols, nullExtendedCols, nil, false)
 	loj.AddEquivalence(NewFastIntSet(1), NewFastIntSet(10))
 
 	testcases := []struct {
diff --git a/planner/funcdep/only_full_group_by_test.go b/planner/funcdep/only_full_group_by_test.go
index b519436beabff..4749b29786e97 100644
--- a/planner/funcdep/only_full_group_by_test.go
+++ b/planner/funcdep/only_full_group_by_test.go
@@ -230,4 +230,18 @@ func TestOnlyFullGroupByOldCases(t *testing.T) {
 	err = tk.ExecToErr("select b from t1 group by a")
 	require.NotNil(t, err)
 	require.Equal(t, err.Error(), "[planner:1055]Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'test.t1.b' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by")
+
+	// fix issue 30025
+	tk.MustExec("drop table if exists t1,t2;")
+	tk.MustExec("CREATE TABLE t1(a INTEGER);")
+	tk.MustExec("INSERT INTO t1 VALUES (1), (2);")
+	err = tk.ExecToErr("SELECT a FROM t1 ORDER BY (SELECT COUNT(t1.a) FROM t1 AS t2);")
+	require.NotNil(t, err)
+	require.Equal(t, err.Error(), "[planner:3029]Expression #1 of ORDER BY contains aggregate function and applies to the result of a non-aggregated query")
+	// from sql standard and what postgres does, this query should report more-than-one-row error in execution phase.
+	// but mysql has done a special check for it, since outer query has exactly only one row, group by clause can be eliminated (so order-by scalar sub-query check is bypassed)
+	err = tk.QueryToErr("SELECT SUM(a) FROM t1 ORDER BY (SELECT COUNT(t1.a) FROM t1 AS t2);")
+	require.NotNil(t, err)
+	require.Equal(t, err.Error(), "[executor:1242]Subquery returns more than 1 row")
+	tk.MustQuery("SELECT SUM(a) FROM t1 ORDER BY (SELECT COUNT(t2.a) FROM t1 AS t2);")
 }

From 13da49a11da6c8c3ea411ef290080fff2ec420c3 Mon Sep 17 00:00:00 2001
From: ailinkid <314806019@qq.com>
Date: Thu, 24 Mar 2022 20:03:35 +0800
Subject: [PATCH 06/13] move order by check logical to buildOrderBy

Signed-off-by: ailinkid <314806019@qq.com>
---
 planner/core/logical_plan_builder.go | 435 ++++++++++++---------------
 planner/core/logical_plans.go        |  16 +-
 planner/core/plan.go                 |   3 +-
 3 files changed, 205 insertions(+), 249 deletions(-)

diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index 0d52e583e1b61..a0ffeb5128e24 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -1236,11 +1236,11 @@ func findColFromNaturalUsingJoin(p LogicalPlan, col *expression.Column) (name *t
 
 // buildProjection returns a Projection plan and non-aux columns length.
 func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *ast.SelectStmt, mapper map[*ast.AggregateFuncExpr]int,
-	windowMapper map[*ast.WindowFuncExpr]int, orderMapper map[*ast.AggregateFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, []*util.ByItems, int, error) {
+	windowMapper map[*ast.WindowFuncExpr]int, considerWindow bool, expandGenerateColumn bool) (LogicalPlan, int, error) {
 	fields := sel.Fields.Fields
 	err := b.preprocessUserVarTypes(ctx, p, fields, mapper)
 	if err != nil {
-		return nil, nil, 0, err
+		return nil, 0, err
 	}
 	b.optFlag |= flagEliminateProjection
 	b.curClause = fieldList
@@ -1270,7 +1270,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 			proj.Exprs = append(proj.Exprs, expr)
 			col, name, err := b.buildProjectionField(ctx, p, field, expr)
 			if err != nil {
-				return nil, nil, 0, err
+				return nil, 0, err
 			}
 			schema.Append(col)
 			newNames = append(newNames, name)
@@ -1278,7 +1278,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 		}
 		newExpr, np, err := b.rewriteWithPreprocess(ctx, field.Expr, p, mapper, windowMapper, true, nil)
 		if err != nil {
-			return nil, nil, 0, err
+			return nil, 0, err
 		}
 
 		// For window functions in the order by clause, we will append an field for it.
@@ -1294,7 +1294,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 
 		col, name, err := b.buildProjectionField(ctx, p, field, newExpr)
 		if err != nil {
-			return nil, nil, 0, err
+			return nil, 0, err
 		}
 		schema.Append(col)
 		newNames = append(newNames, name)
@@ -1321,10 +1321,7 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 		}
 	}
 	proj.SetChildren(p)
-	var resultP LogicalPlan
-	resultP = proj
 	// delay the only-full-group-by-check in create view statement to later query.
-	var orderByItemExprs []*util.ByItems
 	if !b.isCreateView {
 		fds := proj.ExtractFD()
 		// Projection -> Children -> ...
@@ -1400,195 +1397,15 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, sel *a
 				}
 				// Only1Zero is to judge whether it's no-group-by-items case.
 				if !fds.GroupByCols.Only1Zero() {
-					return nil, nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInSelect, name)
+					return nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInSelect, name)
 				} else {
-					return nil, nil, 0, ErrMixOfGroupFuncAndFields.GenWithStackByArgs(offset+1, name)
-				}
-			}
-			if fds.GroupByCols.Only1Zero() {
-				// maxOneRow is delayed from agg's ExtractFD logic since some details listed in it.
-				projectionUniqueIDs := fd.NewFastIntSet()
-				for _, expr := range proj.Exprs {
-					switch x := expr.(type) {
-					case *expression.Column:
-						projectionUniqueIDs.Insert(int(x.UniqueID))
-					case *expression.ScalarFunction:
-						scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
-						if !ok {
-							panic("selected expr must have been registered, shouldn't be here")
-						}
-						projectionUniqueIDs.Insert(scalarUniqueID)
-					}
-				}
-				fds.MaxOneRow(projectionUniqueIDs)
-			}
-			// for select * from view (include agg), outer projection don't have to check select list with the inner group-by flag.
-			fds.HasAggBuilt = false
-		}
-		if sel.OrderBy != nil {
-			selectExprsUniqueIDs := fd.NewFastIntSet()
-			for _, expr := range proj.Exprs[:oldLen] {
-				switch x := expr.(type) {
-				case *expression.Column:
-					selectExprsUniqueIDs.Insert(int(x.UniqueID))
-				case *expression.ScalarFunction:
-					scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
-					if !ok {
-						panic("selected expr must have been registered, shouldn't be here")
-					}
-					selectExprsUniqueIDs.Insert(scalarUniqueID)
-				default:
-				}
-			}
-			// check only-full-group-by 4 order-by clause with group-by clause
-			resultP, orderByItemExprs, err = b.buildOrderByItems(ctx, proj, sel.OrderBy.Items, orderMapper, windowMapper, proj.Exprs, oldLen, sel.Distinct)
-			if err != nil {
-				return nil, nil, 0, err
-			}
-			if sel.GroupBy != nil {
-				for offset, odrItem := range orderByItemExprs {
-					item := fd.NewFastIntSet()
-					switch x := odrItem.Expr.(type) {
-					case *expression.Column:
-						item.Insert(int(x.UniqueID))
-					case *expression.ScalarFunction:
-						// order by item may not be projected as a column in projection, allocated a new one
-						scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
-						if !ok {
-							// panic("selected expr must have been registered, shouldn't be here")
-							scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID())
-						}
-						item.Insert(scalarUniqueID)
-					case *expression.Constant:
-						// order by null/false/true can always be ok, let item be empty.
-					default:
-					}
-					// Step#1: whether order by item is in the origin select field list
-					if item.SubsetOf(selectExprsUniqueIDs) {
-						continue
-					}
-					// Step#2: whether order by item is in the group by list
-					if item.SubsetOf(fds.GroupByCols) {
-						continue
-					}
-					// Step#3: whether order by item is in the FD closure of group-by & select list items.
-					if item.SubsetOf(fds.ConstantCols()) {
-						continue
-					}
-					strictClosureOfGroupByCols := fds.ClosureOfStrict(fds.GroupByCols)
-					if item.SubsetOf(strictClosureOfGroupByCols) {
-						continue
-					}
-					strictClosureOfSelectCols := fds.ClosureOfStrict(selectExprsUniqueIDs)
-					if item.SubsetOf(strictClosureOfSelectCols) {
-						continue
-					}
-					// locate the base col that are not in (constant list / group by list / strict fd closure) for error show.
-					baseCols := expression.ExtractColumns(odrItem.Expr)
-					if len(baseCols) == 0 {
-						// order by item has no reference of base col, like order by abs(1) and rand()
-						continue
-					}
-					// GROUP BY t1.a, t2.b ORDER BY COALESCE(MIN(t.c), t2.b), agg min is determined by group-col definitely.
-					baseColsUniqueIDs := fd.NewFastIntSet()
-					for _, bc := range baseCols {
-						baseColsUniqueIDs.Insert(int(bc.UniqueID))
-					}
-					if baseColsUniqueIDs.SubsetOf(strictClosureOfGroupByCols) {
-						continue
-					}
-					if baseColsUniqueIDs.SubsetOf(strictClosureOfSelectCols) {
-						continue
-					}
-					errShowCol := baseCols[0]
-					for _, col := range baseCols {
-						colSet := fd.NewFastIntSet(int(col.UniqueID))
-						if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) {
-							errShowCol = col
-							break
-						}
-					}
-					// better use the schema alias name firstly if any.
-					name := ""
-					for idx, schemaCol := range proj.Schema().Columns {
-						if schemaCol.UniqueID == errShowCol.UniqueID {
-							name = proj.names[idx].String()
-							break
-						}
-					}
-					if name == "" {
-						name = errShowCol.OrigName
-					}
-					return nil, nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInOrderBy, name)
-				}
-			}
-			if sel.Distinct {
-				// order-by with distinct case
-				// Rule #1: order by item should be in the select filed list
-				// Rule #2: the base col that order by item dependent on should be in the select field list
-				for offset, odrItem := range orderByItemExprs {
-					item := fd.NewFastIntSet()
-					switch x := odrItem.Expr.(type) {
-					case *expression.Column:
-						item.Insert(int(x.UniqueID))
-					case *expression.ScalarFunction:
-						// order by item may not be projected as a column in projection, allocated a new one
-						scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
-						if !ok {
-							// panic("selected expr must have been registered, shouldn't be here")
-							scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID())
-						}
-						item.Insert(scalarUniqueID)
-					default:
-					}
-					// Rule #1
-					if item.SubsetOf(selectExprsUniqueIDs) {
-						continue
-					}
-					// Rule #2
-					baseCols := expression.ExtractColumns(odrItem.Expr)
-					if len(baseCols) == 0 {
-						// order by item has no reference of base col, like order by abs(1) and rand()
-						continue
-					}
-					colSet := fd.NewFastIntSet()
-					for _, col := range baseCols {
-						colSet.Insert(int(col.UniqueID))
-					}
-					if colSet.SubsetOf(selectExprsUniqueIDs) {
-						continue
-					}
-					// find that error base col
-					errShowCol := baseCols[0]
-					for _, col := range baseCols {
-						colSet := fd.NewFastIntSet()
-						colSet.Insert(int(col.UniqueID))
-						if !colSet.SubsetOf(selectExprsUniqueIDs) {
-							errShowCol = col
-							break
-						}
-					}
-					// better use the schema alias name firstly if any.
-					name := ""
-					for idx, schemaCol := range proj.Schema().Columns {
-						if schemaCol.UniqueID == errShowCol.UniqueID {
-							name = proj.names[idx].String()
-							break
-						}
-					}
-					if name == "" {
-						name = errShowCol.OrigName
-					}
-					if _, ok := sel.OrderBy.Items[offset].Expr.(*ast.AggregateFuncExpr); ok {
-						return nil, nil, 0, ErrAggregateInOrderNotSelect.GenWithStackByArgs(offset+1, "DISTINCT")
-					}
-					// select distinct count(a) from t group by b order by sum(a);          ✗
-					return nil, nil, 0, ErrFieldInOrderNotSelect.GenWithStackByArgs(offset+1, name, "DISTINCT")
+					return nil, 0, ErrMixOfGroupFuncAndFields.GenWithStackByArgs(offset+1, name)
 				}
 			}
 		}
+		proj.FDChecked = true
 	}
-	return resultP, orderByItemExprs, oldLen, nil
+	return proj, oldLen, nil
 }
 
 func (b *PlanBuilder) buildDistinct(child LogicalPlan, length int) (*LogicalAggregation, error) {
@@ -1983,12 +1800,8 @@ func (t *itemTransformer) Leave(inNode ast.Node) (ast.Node, bool) {
 	return inNode, false
 }
 
-func (b *PlanBuilder) buildSort(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int) (*LogicalSort, error) {
-	return b.buildSortWithCheck(ctx, p, byItems, aggMapper, windowMapper, nil, 0, false)
-}
-
 func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int,
-	windowMapper map[*ast.WindowFuncExpr]int, projExprs []expression.Expression, oldLen int, hasDistinct bool) (LogicalPlan, []*util.ByItems, error) {
+	windowMapper map[*ast.WindowFuncExpr]int) (LogicalPlan, []*util.ByItems, error) {
 	exprs := make([]*util.ByItems, 0, len(byItems))
 	transformer := &itemTransformer{}
 	for _, item := range byItems {
@@ -1998,65 +1811,203 @@ func (b *PlanBuilder) buildOrderByItems(ctx context.Context, p LogicalPlan, byIt
 		if err != nil {
 			return nil, nil, err
 		}
-
-		// check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442
-		//if hasDistinct && projExprs != nil {
-		//	err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen)
-		//	if err != nil {
-		//		return nil, err
-		//	}
-		//}
-
 		p = np
 		exprs = append(exprs, &util.ByItems{Expr: it, Desc: item.Desc})
 	}
 	return p, exprs, nil
 }
 
-func (b *PlanBuilder) buildSortWithCheck(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int,
-	projExprs []expression.Expression, oldLen int, hasDistinct bool) (*LogicalSort, error) {
+func (b *PlanBuilder) buildSort(ctx context.Context, p LogicalPlan, byItems []*ast.ByItem, aggMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int) (*LogicalSort, error) {
 	if _, isUnion := p.(*LogicalUnionAll); isUnion {
 		b.curClause = globalOrderByClause
 	} else {
 		b.curClause = orderByClause
 	}
-	sort := LogicalSort{}.Init(b.ctx, b.getSelectOffset())
-	exprs := make([]*util.ByItems, 0, len(byItems))
-	transformer := &itemTransformer{}
-	for i, item := range byItems {
-		newExpr, _ := item.Expr.Accept(transformer)
-		item.Expr = newExpr.(ast.ExprNode)
-		it, np, err := b.rewriteWithPreprocess(ctx, item.Expr, p, aggMapper, windowMapper, true, nil)
-		if err != nil {
-			return nil, err
-		}
-
-		// check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442
-		if hasDistinct && projExprs != nil {
-			err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen)
-			if err != nil {
-				return nil, err
-			}
-		}
-
-		p = np
-		exprs = append(exprs, &util.ByItems{Expr: it, Desc: item.Desc})
+	pSort := LogicalSort{}.Init(b.ctx, b.getSelectOffset())
+	p, orderByItemExprs, err := b.buildOrderByItems(ctx, p, byItems, aggMapper, windowMapper)
+	if err != nil {
+		return nil, err
 	}
-	sort.ByItems = exprs
-	sort.SetChildren(p)
-	return sort, nil
+	pSort.ByItems = orderByItemExprs
+	pSort.SetChildren(p)
+	return pSort, nil
 }
 
-func (b *PlanBuilder) buildSortWithCheckV2(p LogicalPlan, byItems []*util.ByItems) (*LogicalSort, error) {
+func (b *PlanBuilder) buildSortCheck(ctx context.Context, p LogicalPlan, sel *ast.SelectStmt, proj *LogicalProjection, oldLen int, orderMapper map[*ast.AggregateFuncExpr]int, windowMapper map[*ast.WindowFuncExpr]int) (*LogicalSort, error) {
 	if _, isUnion := p.(*LogicalUnionAll); isUnion {
 		b.curClause = globalOrderByClause
 	} else {
 		b.curClause = orderByClause
 	}
-	sort := LogicalSort{}.Init(b.ctx, b.getSelectOffset())
-	sort.ByItems = byItems
-	sort.SetChildren(p)
-	return sort, nil
+	pSort := LogicalSort{}.Init(b.ctx, b.getSelectOffset())
+	p, orderByItemExprs, err := b.buildOrderByItems(ctx, p, sel.OrderBy.Items, orderMapper, windowMapper)
+	if err != nil {
+		return nil, err
+	}
+	if !b.isCreateView {
+		// check only-full-group-by 4 order-by clause with group-by clause
+		fds := proj.ExtractFD()
+		if sel.OrderBy != nil {
+			selectExprsUniqueIDs := fd.NewFastIntSet()
+			for _, expr := range proj.Exprs[:oldLen] {
+				switch x := expr.(type) {
+				case *expression.Column:
+					selectExprsUniqueIDs.Insert(int(x.UniqueID))
+				case *expression.ScalarFunction:
+					scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
+					if !ok {
+						panic("selected expr must have been registered, shouldn't be here")
+					}
+					selectExprsUniqueIDs.Insert(scalarUniqueID)
+				default:
+				}
+			}
+			if sel.GroupBy != nil {
+				for offset, odrItem := range orderByItemExprs {
+					item := fd.NewFastIntSet()
+					switch x := odrItem.Expr.(type) {
+					case *expression.Column:
+						item.Insert(int(x.UniqueID))
+					case *expression.ScalarFunction:
+						// order by item may not be projected as a column in projection, allocated a new one
+						scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
+						if !ok {
+							// panic("selected expr must have been registered, shouldn't be here")
+							scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID())
+						}
+						item.Insert(scalarUniqueID)
+					case *expression.Constant:
+						// order by null/false/true can always be ok, let item be empty.
+					default:
+					}
+					// Step#1: whether order by item is in the origin select field list
+					if item.SubsetOf(selectExprsUniqueIDs) {
+						continue
+					}
+					// Step#2: whether order by item is in the group by list
+					if item.SubsetOf(fds.GroupByCols) {
+						continue
+					}
+					// Step#3: whether order by item is in the FD closure of group-by & select list items.
+					if item.SubsetOf(fds.ConstantCols()) {
+						continue
+					}
+					strictClosureOfGroupByCols := fds.ClosureOfStrict(fds.GroupByCols)
+					if item.SubsetOf(strictClosureOfGroupByCols) {
+						continue
+					}
+					strictClosureOfSelectCols := fds.ClosureOfStrict(selectExprsUniqueIDs)
+					if item.SubsetOf(strictClosureOfSelectCols) {
+						continue
+					}
+					// locate the base col that are not in (constant list / group by list / strict fd closure) for error show.
+					baseCols := expression.ExtractColumns(odrItem.Expr)
+					if len(baseCols) == 0 {
+						// order by item has no reference of base col, like order by abs(1) and rand()
+						continue
+					}
+					// GROUP BY t1.a, t2.b ORDER BY COALESCE(MIN(t.c), t2.b), agg min is determined by group-col definitely.
+					baseColsUniqueIDs := fd.NewFastIntSet()
+					for _, bc := range baseCols {
+						baseColsUniqueIDs.Insert(int(bc.UniqueID))
+					}
+					if baseColsUniqueIDs.SubsetOf(strictClosureOfGroupByCols) {
+						continue
+					}
+					if baseColsUniqueIDs.SubsetOf(strictClosureOfSelectCols) {
+						continue
+					}
+					errShowCol := baseCols[0]
+					for _, col := range baseCols {
+						colSet := fd.NewFastIntSet(int(col.UniqueID))
+						if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) {
+							errShowCol = col
+							break
+						}
+					}
+					// better use the schema alias name firstly if any.
+					name := ""
+					for idx, schemaCol := range proj.Schema().Columns {
+						if schemaCol.UniqueID == errShowCol.UniqueID {
+							name = proj.names[idx].String()
+							break
+						}
+					}
+					if name == "" {
+						name = errShowCol.OrigName
+					}
+					return nil, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInOrderBy, name)
+				}
+			}
+			if sel.Distinct {
+				// order-by with distinct case
+				// Rule #1: order by item should be in the select filed list
+				// Rule #2: the base col that order by item dependent on should be in the select field list
+				for offset, odrItem := range orderByItemExprs {
+					item := fd.NewFastIntSet()
+					switch x := odrItem.Expr.(type) {
+					case *expression.Column:
+						item.Insert(int(x.UniqueID))
+					case *expression.ScalarFunction:
+						// order by item may not be projected as a column in projection, allocated a new one
+						scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
+						if !ok {
+							// panic("selected expr must have been registered, shouldn't be here")
+							scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID())
+						}
+						item.Insert(scalarUniqueID)
+					default:
+					}
+					// Rule #1
+					if item.SubsetOf(selectExprsUniqueIDs) {
+						continue
+					}
+					// Rule #2
+					baseCols := expression.ExtractColumns(odrItem.Expr)
+					if len(baseCols) == 0 {
+						// order by item has no reference of base col, like order by abs(1) and rand()
+						continue
+					}
+					colSet := fd.NewFastIntSet()
+					for _, col := range baseCols {
+						colSet.Insert(int(col.UniqueID))
+					}
+					if colSet.SubsetOf(selectExprsUniqueIDs) {
+						continue
+					}
+					// find that error base col
+					errShowCol := baseCols[0]
+					for _, col := range baseCols {
+						colSet := fd.NewFastIntSet()
+						colSet.Insert(int(col.UniqueID))
+						if !colSet.SubsetOf(selectExprsUniqueIDs) {
+							errShowCol = col
+							break
+						}
+					}
+					// better use the schema alias name firstly if any.
+					name := ""
+					for idx, schemaCol := range proj.Schema().Columns {
+						if schemaCol.UniqueID == errShowCol.UniqueID {
+							name = proj.names[idx].String()
+							break
+						}
+					}
+					if name == "" {
+						name = errShowCol.OrigName
+					}
+					if _, ok := sel.OrderBy.Items[offset].Expr.(*ast.AggregateFuncExpr); ok {
+						return nil, ErrAggregateInOrderNotSelect.GenWithStackByArgs(offset+1, "DISTINCT")
+					}
+					// select distinct count(a) from t group by b order by sum(a);          ✗
+					return nil, ErrFieldInOrderNotSelect.GenWithStackByArgs(offset+1, name, "DISTINCT")
+				}
+			}
+		}
+	}
+	pSort.ByItems = orderByItemExprs
+	pSort.SetChildren(p)
+	return pSort, nil
 }
 
 // checkOrderByInDistinct checks whether ORDER BY has conflicts with DISTINCT, see #12442
@@ -4053,7 +4004,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 		windowAggMap                  map[*ast.AggregateFuncExpr]int
 		correlatedAggMap              map[*ast.AggregateFuncExpr]int
 		gbyCols                       []expression.Expression
-		orderByItems                  []*util.ByItems
+		proj                          LogicalPlan
 	)
 
 	// set for update read to true before building result set node
@@ -4202,7 +4153,8 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 	var oldLen int
 	// According to https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html,
 	// we can only process window functions after having clause, so `considerWindow` is false now.
-	p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, orderMap, false, sel.OrderBy != nil)
+	proj, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, orderMap, false, sel.OrderBy != nil)
+	p = proj
 	if err != nil {
 		return nil, err
 	}
@@ -4240,7 +4192,8 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 		// In such case plan `p` is not changed, so we don't have to build another projection.
 		if hasWindowFuncField {
 			// Now we build the window function fields.
-			p, orderByItems, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, orderMap, true, false)
+			proj, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, orderMap, true, false)
+			p = proj
 			if err != nil {
 				return nil, err
 			}
@@ -4256,7 +4209,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 
 	if sel.OrderBy != nil {
 		if b.ctx.GetSessionVars().SQLMode.HasOnlyFullGroupBy() {
-			p, err = b.buildSortWithCheckV2(p, orderByItems)
+			p, err = b.buildSortCheck(ctx, p, sel, proj.(*LogicalProjection), oldLen, orderMap, windowMapper)
 		} else {
 			p, err = b.buildSort(ctx, p, sel.OrderBy.Items, orderMap, windowMapper)
 		}
diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
index 04d9ae146542d..44ae6760df5f3 100644
--- a/planner/core/logical_plans.go
+++ b/planner/core/logical_plans.go
@@ -15,7 +15,6 @@
 package core
 
 import (
-	"fmt"
 	"math"
 
 	"github.com/pingcap/tidb/expression"
@@ -596,6 +595,13 @@ func (p *LogicalProjection) ExtractFD() *fd.FDSet {
 	fds.MakeNotNull(notnullColsUniqueIDs)
 	// select max(a) from t group by b, we should project both `a` & `b` to maintain the FD down here, even if select-fields only contain `a`.
 	fds.ProjectCols(outputColsUniqueIDs.Union(fds.GroupByCols))
+	if fds.HasAggBuilt && fds.GroupByCols.Only1Zero() && p.baseLogicalPlan.FDChecked {
+		// maxOneRow is delayed from agg's ExtractFD logic since some details listed in it.
+		fds.MaxOneRow(outputColsUniqueIDs)
+		// for select * from view (include agg), outer projection don't have to check select list with the inner group-by flag.
+		fds.HasAggBuilt = false
+		fds.GroupByCols.Clear()
+	}
 	// just trace it down in every operator for test checking.
 	p.fdSet = fds
 	return fds
@@ -1078,19 +1084,16 @@ func (la *LogicalApply) ExtractFD() *fd.FDSet {
 		}
 	}
 	// select (select t1.a from t1 where t1.rid = t2.id), count(t2.b) from t2 group by (t2.id)
-	// for correlated scalar sub-query, the whole sub-query will be projected as a new column for example X here.
+	// for correlated scalar sub-query, the whole sub-query will be projected as a new column for example here.
 	// while for every same t2.id, this sub-query's scalar output must be the same, actually it's a kind of strict FD here.
 	applyStrictDetermine := fd.NewFastIntSet()
 	applyStrictDependency := fd.NewFastIntSet()
-	if innerPlan.Schema().Len() == 1 {
+	if innerPlan.Schema().Len() == 1 && len(deduplicateCorrelatedCols) > 0 {
 		// single column in apply join inner side will be output directly.
 		for _, cc := range deduplicateCorrelatedCols {
 			applyStrictDetermine.Insert(int(cc.UniqueID))
 		}
 		applyStrictDependency.Insert(int(innerPlan.Schema().Columns[0].UniqueID))
-
-	} else {
-		// multi columns will be wrapped as a new row func, actually errors like: Operand should contain 1 column(s)
 	}
 
 	switch la.JoinType {
@@ -1099,7 +1102,6 @@ func (la *LogicalApply) ExtractFD() *fd.FDSet {
 	case LeftOuterJoin, RightOuterJoin:
 		fds := la.extractFDForOuterJoin(eqCond)
 		fds.AddStrictFunctionalDependency(applyStrictDetermine, applyStrictDependency)
-		fmt.Println("cao: from", applyStrictDetermine.String(), "to", applyStrictDependency)
 		return fds
 	case SemiJoin:
 		return la.extractFDForSemiJoin(eqCond)
diff --git a/planner/core/plan.go b/planner/core/plan.go
index 216dc4c3c1654..79c7615bb9a1c 100644
--- a/planner/core/plan.go
+++ b/planner/core/plan.go
@@ -377,7 +377,8 @@ type baseLogicalPlan struct {
 	// including eliminating unnecessary DISTINCT operators, simplifying ORDER BY columns,
 	// removing Max1Row operators, and mapping semi-joins to inner-joins.
 	// for now, it's hard to maintain in individual operator, build it from bottom up when using.
-	fdSet *fd.FDSet
+	fdSet     *fd.FDSet
+	FDChecked bool
 }
 
 // ExtractFD return the children[0]'s fdSet if there are no adding/removing fd in this logic plan.

From 6fd8035a4a4c1b17185f5e66bd3ef82809edf6f0 Mon Sep 17 00:00:00 2001
From: arenatlx <arenatlx@arenatlxdeMacBook-Pro.local>
Date: Tue, 29 Mar 2022 23:29:03 +0800
Subject: [PATCH 07/13] fix order by for skipping extract correlated col

Signed-off-by: arenatlx <arenatlx@arenatlxdeMacBook-Pro.local>
---
 planner/core/logical_plan_builder.go | 77 ++++++++++++++--------------
 1 file changed, 38 insertions(+), 39 deletions(-)

diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index a0ffeb5128e24..9321309bdf523 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -3325,18 +3325,18 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt
 		// Since here doesn't require FD to check, this can take place as soon as possible.
 		// Step1:
 		isNonAggregatedQuery := false
-		resolver1 := colResolverForOnlyFullGroupBy{
+		resolver4AggDetect := colResolverForOnlyFullGroupBy{
 			firstOrderByAggColIdx: -1,
 		}
-		resolver1.curClause = fieldList
+		resolver4AggDetect.curClause = fieldList
 		for idx, field := range sel.Fields.Fields {
-			resolver1.exprIdx = idx
-			field.Accept(&resolver1)
+			resolver4AggDetect.exprIdx = idx
+			field.Accept(&resolver4AggDetect)
 		}
 		if sel.Having != nil {
-			sel.Having.Expr.Accept(&resolver1)
+			sel.Having.Expr.Accept(&resolver4AggDetect)
 		}
-		if !resolver1.hasAggFunc {
+		if !resolver4AggDetect.hasAggFunc {
 			isNonAggregatedQuery = true
 		}
 
@@ -3344,7 +3344,7 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt
 		if sel.OrderBy != nil && isNonAggregatedQuery {
 			// Temporarily solve the case like: SELECT a FROM t1 ORDER BY (SELECT COUNT(t2.a) FROM t1 AS t2);
 			// We need to detect the correlated agg in order by clause, and report error in the outer query.
-			resolver2 := &correlatedAggregateResolver{
+			resolver4CorAggDetect := &correlatedAggregateResolver{
 				ctx:                ctx,
 				b:                  b,
 				outerPlan:          p,
@@ -3353,47 +3353,45 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt
 			// YES: SQL like `select a from t where a = 1 order by count(b)` is illegal.
 			// NO: SELECT t1.a FROM t1 GROUP BY t1.a HAVING t1.a IN (SELECT t2.a FROM t2 ORDER BY SUM(t1.b)) is ok.
 			// judge whether the aggregated func in order by item is referred to outer schema.
-			resolver1.curClause = orderByClause
-			resolver1.orderByAggMap = make(map[*ast.AggregateFuncExpr]int, len(sel.OrderBy.Items))
+			resolver4AggDetect.curClause = orderByClause
+			resolver4AggDetect.orderByAggMap = make(map[*ast.AggregateFuncExpr]int, len(sel.OrderBy.Items))
 			for idx, byItem := range sel.OrderBy.Items {
-				resolver1.exprIdx = idx
-				byItem.Expr.Accept(&resolver1)
+				resolver4AggDetect.exprIdx = idx
+				byItem.Expr.Accept(&resolver4AggDetect)
 
 				// detect the correlated agg in sub query.
-				_, ok := byItem.Expr.Accept(resolver2)
+				_, ok := byItem.Expr.Accept(resolver4CorAggDetect)
 				if !ok {
-					return resolver2.err
+					return resolver4CorAggDetect.err
 				}
-				if len(resolver2.correlatedAggFuncs) > 0 {
-					for _, one := range resolver2.correlatedAggFuncs {
-						if _, ok := resolver1.orderByAggMap[one]; !ok {
-							resolver1.orderByAggMap[one] = idx
+				if len(resolver4CorAggDetect.correlatedAggFuncs) > 0 {
+					for _, one := range resolver4CorAggDetect.correlatedAggFuncs {
+						if _, ok := resolver4AggDetect.orderByAggMap[one]; !ok {
+							resolver4AggDetect.orderByAggMap[one] = idx
 						}
 					}
-					resolver2.correlatedAggFuncs = resolver2.correlatedAggFuncs[:0]
+					resolver4CorAggDetect.correlatedAggFuncs = resolver4CorAggDetect.correlatedAggFuncs[:0]
 				}
 			}
-			// has no order-by agg.
-			if len(resolver1.orderByAggMap) == 0 {
-				return nil
-			}
 			// has some order-by agg.
-			for k, v := range resolver1.orderByAggMap {
-				var cols []*expression.Column
-				for _, arg := range k.Args {
-					newArg, np, err := b.rewrite(ctx, arg, p, nil, true)
-					if err != nil {
-						return err
+			if len(resolver4AggDetect.orderByAggMap) != 0 {
+				for k, v := range resolver4AggDetect.orderByAggMap {
+					var cols []*expression.Column
+					for _, arg := range k.Args {
+						newArg, np, err := b.rewrite(ctx, arg, p, nil, true)
+						if err != nil {
+							return err
+						}
+						p = np
+						cols = append(cols, expression.ExtractColumns(newArg)...)
 					}
-					p = np
-					cols = append(cols, expression.ExtractColumns(newArg)...)
-				}
-				// use outer logical query block correlated cols --- valid
-				if len(cols) == 0 {
-					continue
+					// use outer logical query block correlated cols --- valid
+					if len(cols) == 0 {
+						continue
+					}
+					// use logical query block cols --- invalid
+					return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(v + 1)
 				}
-				// use logical query block cols --- invalid
-				return ErrAggregateOrderNonAggQuery.GenWithStackByArgs(v + 1)
 			}
 		}
 		// we need extract all current-level-correlated columns from order by sub-query item to select fields if it has.
@@ -3416,7 +3414,8 @@ func (b *PlanBuilder) checkOrderByPart1(ctx context.Context, sel *ast.SelectStmt
 									Name:   pname.ColName,
 								}
 								sel.Fields.Fields = append(sel.Fields.Fields, &ast.SelectField{
-									Auxiliary:         true,
+									Auxiliary: true,
+									// todo: not sure here, it should be unified with correlatedAggExtractor!
 									AuxiliaryColInAgg: true,
 									Expr:              &ast.ColumnNameExpr{Name: colName},
 								})
@@ -4153,7 +4152,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 	var oldLen int
 	// According to https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html,
 	// we can only process window functions after having clause, so `considerWindow` is false now.
-	proj, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, orderMap, false, sel.OrderBy != nil)
+	proj, oldLen, err = b.buildProjection(ctx, p, sel, totalMap, nil, false, sel.OrderBy != nil)
 	p = proj
 	if err != nil {
 		return nil, err
@@ -4192,7 +4191,7 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
 		// In such case plan `p` is not changed, so we don't have to build another projection.
 		if hasWindowFuncField {
 			// Now we build the window function fields.
-			proj, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, orderMap, true, false)
+			proj, oldLen, err = b.buildProjection(ctx, p, sel, windowAggMap, windowMapper, true, false)
 			p = proj
 			if err != nil {
 				return nil, err

From 61bc37a0069048bab429e4b8f7ab8774265bcc99 Mon Sep 17 00:00:00 2001
From: arenatlx <arenatlx@arenatlxdeMacBook-Pro.local>
Date: Tue, 29 Mar 2022 23:38:35 +0800
Subject: [PATCH 08/13] add addFrom commnet

Signed-off-by: arenatlx <arenatlx@arenatlxdeMacBook-Pro.local>
---
 planner/funcdep/fd_graph.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/planner/funcdep/fd_graph.go b/planner/funcdep/fd_graph.go
index 243f2eb4f8043..b0f91a37f55ed 100644
--- a/planner/funcdep/fd_graph.go
+++ b/planner/funcdep/fd_graph.go
@@ -882,6 +882,11 @@ func (s FDSet) AllCols() FastIntSet {
 // AddFrom merges two FD sets by adding each FD from the given set to this set.
 // Since two different tables may have some column ID overlap, we better use
 // column unique ID to build the FDSet instead.
+//
+// AddFrom is commonly used to pull underlining FD up to current operator. Since
+// some ancillary factors for FD like hasAggBuild and GroupCols is strictly limited
+// into an exact scope --- that logical query block, but FD doesn't. So we should
+// clean that stuff when pulling FD across logical query blocks.
 func (s *FDSet) AddFrom(fds *FDSet, acrossBlock bool) {
 	for i := range fds.fdEdges {
 		fd := fds.fdEdges[i]

From 19d47b0813d907a9917ca0f59bffe5f65a010556 Mon Sep 17 00:00:00 2001
From: arenatlx <arenatlx@arenatlxdeMacBook-Pro.local>
Date: Tue, 29 Mar 2022 23:41:39 +0800
Subject: [PATCH 09/13] .

Signed-off-by: arenatlx <arenatlx@arenatlxdeMacBook-Pro.local>
---
 planner/core/plan.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/planner/core/plan.go b/planner/core/plan.go
index 79c7615bb9a1c..0159b400088b5 100644
--- a/planner/core/plan.go
+++ b/planner/core/plan.go
@@ -387,7 +387,7 @@ func (p *baseLogicalPlan) ExtractFD() *fd.FDSet {
 		return p.fdSet
 	}
 	fds := &fd.FDSet{HashCodeToUniqueID: make(map[string]int)}
-	// isolation between different logical selection blocks.
+	// isolation between different logical query blocks.
 	acrossBlock := false
 	for _, ch := range p.children {
 		if p.SelectBlockOffset() != ch.SelectBlockOffset() {

From 34ee6d5e4b1cbfd522aceb9742b5a729ab3a212d Mon Sep 17 00:00:00 2001
From: arenatlx <arenatlx@arenatlxdeMBP.lan>
Date: Wed, 30 Mar 2022 11:15:27 +0800
Subject: [PATCH 10/13] add test

Signed-off-by: arenatlx <arenatlx@arenatlxdeMBP.lan>
---
 planner/funcdep/only_full_group_by_test.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/planner/funcdep/only_full_group_by_test.go b/planner/funcdep/only_full_group_by_test.go
index 4749b29786e97..1edd739fa9c2b 100644
--- a/planner/funcdep/only_full_group_by_test.go
+++ b/planner/funcdep/only_full_group_by_test.go
@@ -244,4 +244,12 @@ func TestOnlyFullGroupByOldCases(t *testing.T) {
 	require.NotNil(t, err)
 	require.Equal(t, err.Error(), "[executor:1242]Subquery returns more than 1 row")
 	tk.MustQuery("SELECT SUM(a) FROM t1 ORDER BY (SELECT COUNT(t2.a) FROM t1 AS t2);")
+
+	// fix issue 26945
+	tk.MustExec("drop table if exists t1,t2")
+	tk.MustExec("create table t1(a int, b int)")
+	tk.MustExec("create table t2(a int, b int)")
+	tk.MustExec("insert into t1 values(1,1)")
+	tk.MustExec("insert into t2 values(1,1)")
+	tk.MustQuery("select one.a from t1 one order by (select two.b from t2 two where two.a = one.b)").Check(testkit.Rows("1"))
 }

From 2d18cc8e0861f1b479e25e471b8929b997399242 Mon Sep 17 00:00:00 2001
From: AilinKid <314806019@qq.com>
Date: Wed, 20 Apr 2022 16:58:06 +0800
Subject: [PATCH 11/13] fix-order-by-fd2

Signed-off-by: AilinKid <314806019@qq.com>
---
 planner/core/logical_plans.go              |  19 +--
 planner/funcdep/doc.go                     | 151 +++++++++++++++++++++
 planner/funcdep/fd_graph.go                | 133 ++++++++++++------
 planner/funcdep/only_full_group_by_test.go |   1 +
 4 files changed, 252 insertions(+), 52 deletions(-)

diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
index 44ae6760df5f3..1f5028f780078 100644
--- a/planner/core/logical_plans.go
+++ b/planner/core/logical_plans.go
@@ -15,7 +15,9 @@
 package core
 
 import (
+	"fmt"
 	"math"
+	"strings"
 
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/expression/aggregation"
@@ -365,6 +367,10 @@ func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expres
 		outerFD.GroupByCols.Clear()
 	}
 	fds := outerFD
+
+	if strings.HasPrefix(p.ctx.GetSessionVars().StmtCtx.OriginalSQL, "select c1.a, count(*) from customer2 c3 left join (customer1 c1 left join customer2 c2 on c1.a=c2.b) on c3.b=c1.a where c2.pk in (7,9) group by c2.b") {
+		fmt.Println(1)
+	}
 	fds.MakeOuterJoin(innerFD, filterFD, outerCols, innerCols, &opt, innerAcrossBlock)
 	p.fdSet = fds
 	return fds
@@ -1008,19 +1014,6 @@ func (p *LogicalSelection) ExtractFD() *fd.FDSet {
 	// extract equivalence cols.
 	equivUniqueIDs := extractEquivalenceCols(p.Conditions, p.SCtx(), fds)
 
-	// after left join, according to rule 3.3.3, it may create a lax FD from inner equivalence
-	// cols pointing to outer equivalence cols.  eg: t left join t1 on t.a = t1.b, leading a
-	// lax FD from t1.b ~> t.a, this lax attribute is coming from supplied null value to all
-	// left rows, once there is a null-refusing predicate on the inner side on upper layer, this
-	// can be equivalence again. (the outer rows left are all coming from equal matching)
-	//
-	// why not just makeNotNull of them, because even a non-equiv-related inner col can also
-	// refuse supplied null values.
-	if fds.Rule333Equiv.InnerCols.Len() != 0 && notnullColsUniqueIDs.Intersects(fds.Rule333Equiv.InnerCols) {
-		// restore/re-strength FDs from rule 333
-		fds.MakeRestoreRule333()
-	}
-
 	// apply operator's characteristic's FD setting.
 	fds.MakeNotNull(notnullColsUniqueIDs)
 	fds.AddConstants(constUniqueIDs)
diff --git a/planner/funcdep/doc.go b/planner/funcdep/doc.go
index a70399c7a1b1b..9c97ac781888c 100644
--- a/planner/funcdep/doc.go
+++ b/planner/funcdep/doc.go
@@ -10,3 +10,154 @@ package funcdep
 //   https://cs.uwaterloo.ca/research/tr/2000/11/CS-2000-11.thesis.pdf
 
 // TODO: Add the RFC design.
+
+// NOTE 1.
+// when handling Lax FD, we don't care the null value in the dependency, which means
+// as long as null-attribute coverage of the determinant can make a Lax FD as strict one.
+
+// The definition of "lax" used in the paper differs from the definition used by this
+// library. For a lax dependency A~~>B, the paper allows this set of rows:
+//
+//	a  b
+//	-------
+//	1  1
+//	1  NULL
+//
+//	This alternate definition is briefly covered in section 2.5.3.2 of the paper (see definition
+//	2.19). The reason for this change is to allow a lax dependency to be upgraded to a strict
+//	dependency more readily, needing only the determinant columns to be not-null rather than
+//  both determinant and dependant columns.
+//
+// This is on the condition that, for definite values of determinant of a Lax FD, it won't
+// have two same definite dependant value. That's true, because there is no way can derive
+// to this kind of FD.
+//
+// Even in our implementation of outer join, the only way to produce duplicate definite
+// determinant is the join predicate. But for now, we only maintain the equivalence and
+// some strict FD of it.
+//
+//   t(a,b) left join t1(c,d,e) on t.a = t1.c and b=1
+//  a  b  |  c     d     e
+//  ------+----------------
+//  1  1  |  1    NULL   1
+//  1  2  | NULL  NULL  NULL
+//  2  1  | NULL  NULL  NULL
+//
+// Actually it's possible, the lax FD {a} -> {c} can be derived but not that useful. we only
+// maintain the {c} ~> {a} for existence after outer join. Besides, there two Cond-FD should
+// be preserved waiting for be visible again once with the null-reject on the condition of
+// null constraint columns. (see below)
+//
+// NOTE 2.
+// When handle outer join, it won't produce lax FD with duplicate definite determinant values and
+// different dependency values.
+//
+// In implementation，we come across some lax FD dependent on null-reject of some other cols. For
+// example.
+//   t(a,b) left join t1(c,d,e) on t.a = t1.c and b=1
+//  a  b  |  c     d     e
+//  ------+----------------
+//  1  1  |  1    NULL   1
+//  1  2  | NULL  NULL  NULL
+//  2  1  | NULL  NULL  NULL
+//
+// here constant FD {} -> {b} won't be existed after the outer join is done. Notice null-constraint
+// {c,d,e} -| {c,d,e}, this FD should be preserved and will be visible again when some null-reject
+// predicate take effect on the null-constraint cols.
+//
+// It's same for strict equivalence {t.a} = {t1.c}. Notice there are no lax equivalence here, because
+// left side couldn't be guaranteed to be definite or null. like a=2 here. Let's collect all of this
+// on-condition FD down, correspondent with a null-constraints column set, name it as Cond-FD.
+//
+// lax equivalencies are theoretically possible, but it won't be constructed from an outer join unless
+// t already has a constant FD in column `a` here before outer join take a run. So the lax equivalence
+// has some pre-conditions as you see, and it couldn't cover the case shown above. Let us do it like a
+// Cond-FD does.
+//
+// The FD constructed from the join predicate should be considered as Cond-FD. Here like equivalence of
+// {a} == {c} and constant FD {b} = 1 (if the join condition is e=1, it's here too). We can say that for
+// every matched row, this FDs is valid, while for the other rows, the inner side are supplied of null
+// rows. So this FDs are stored as ncEdges with nc condition of all inner table cols.
+//
+// We introduced invisible FD with null-constraint column to solve the problem above named as Cond-FD.
+// For multi embedded left join, we take the following case as an example.
+//    a,b         c,d,e
+// 	-----------+-----------
+//   1    2    |    1  1  1
+// 	 2    2    |
+//  -----------+-----------
+//
+//  left join on (a=c) res:
+//   a   b    c     e     e
+//  -------------------------
+//   1   2    1     1     1
+//   2   2 +- null null  null -+
+//         |                   |
+//         +-------------------+
+//                              \
+//                               \
+//  the Cond-FD are < a=c with {c,d,e} > the latter is as null constraint cols
+//
+//    e,f
+//  -----------------------
+//   1   2
+//   2   2
+//   3   3
+//  -----------------------
+//
+//  left join on (e=a) res:
+//   e   f    a     b      c    d    e
+//  -----------------------------------
+//   1   2    1     2      1    1    1
+//   2   2    2     2  +- null null null --+---------------> Cond-FD are <a=c with {c,d,e}> still exists.
+//   3   3 +-null null |  null null null   |---+
+//         |           +-------------------+   |
+//         +-----------------------------------+-----------> New Cond-FD are <e=a with {a,b,c,d,e}> occurs.
+//
+//
+// the old Cond-FD with null constraint columns set {c,d,e} is preserved cause new append cols are all null too.
+// the new Cond-FD with null constraint columns set {a,b,c,d,e} are also meaningful, even if the null-reject column
+// is one of {c,d,e} which may reduce one of the matched row out of the result, the equivalence {a}={e} still exist.
+//
+// Provide that the result of the first left join is like:
+//  left join on (a=c) res:
+//   a     b    c     e     e
+//  ---------------------------
+//   1     2    1     1     1
+//   null  2  null  null  null
+//
+//  THEN: left join on (e=a) res:
+//   e   f    a     b     c    d    e
+//  ---------------------------------
+//   1   2    1     2     1    1    1
+//   2   2    null null null null null
+//   3   3    3     3   null null null
+//
+//  Even like that, the case of old Cond-FD and new Cond-FD are existed too. Seems the null-constraint column set of
+//  old Cond-FD {c,d,e} can be expanded as {a,b,c,d,e} visually, but we couldn't derive the inference of the join predicate
+//  (e=a). The null-reject of column `a` couldn't bring the visibility to the old Cond-FD theoretically, it just happened
+//  to refuse that row with a null value in column a.
+//
+// Think about adding one more row in first left join result.
+//
+//  left join on (a=c) res:
+//   a     b    c     e     e
+//  ---------------------------
+//   1     2    1     1     1
+//   null  2  null  null  null
+//   3     3  null  null  null
+//
+//  THEN: left join on (e=a) res:
+//   e   f    a     b     c    d    e
+//  ---------------------------------
+//   1   2    1     2     1    1    1
+//   2   2    null null null null null
+//   3   3    3     3   null null null
+//
+//  Conclusion:
+//  As you see that's right we couldn't derive the inference of the join predicate (e=a) to expand old Cond-FD's nc
+//  {c,d,e} as {a,b,c,d,e}. So the rule for Cond-FD is quite simple, just keep the old ncEdge from right, appending
+//  the new ncEdges in current left join.
+//
+//  If the first left join result is in the outer side of the second left join, just keep the ncEdge from left as well,
+//  appending the new ncEdges in current left join.
diff --git a/planner/funcdep/fd_graph.go b/planner/funcdep/fd_graph.go
index b0f91a37f55ed..51bdf95bb43a4 100644
--- a/planner/funcdep/fd_graph.go
+++ b/planner/funcdep/fd_graph.go
@@ -29,14 +29,48 @@ type fdEdge struct {
 	// The value of the strict and eq bool forms the four kind of edges:
 	// functional dependency, lax functional dependency, strict equivalence constraint, lax equivalence constraint.
 	// And if there's a functional dependency `const` -> `column` exists. We would let the from side be empty.
+	// Adjustment: when strict is true and equiv is false, it means the edge is a Lax equivalence; when both true,
+	// it means the original Strict Equivalence.
+	// LAX EQ: (must have the exact same number of columns in each side if a lax EQ)
+	// {A} ~= {C} should be strengthened as {A} == {C} only with AC as definite.
+	// {A} ~= {C} & {C} ~= {D} unless C is definite, we won't get {A} ~= {D}. eg: {1} ~= {null} ~= {2}
+	// {AB} ~= {CD} won't derive to {A} ~= {C}, the opposite side is either, eg: {1,null} ~= {2,null} & {1,2} !(~=) {1,3}
 	strict bool
 	equiv  bool
+
+	// FD with non-nil conditionNC is hidden in FDSet, it will be visible again when at least one null-reject column in conditionNC.
+	// conditionNC should be satisfied before some FD make vision again, it's quite like lax FD to be strengthened as strict
+	// one. But the constraints should take effect on specified columns from conditionNC rather than just determinant columns.
+	conditionNC *FastIntSet
+}
+
+// ncEdge is quite simple for remarking the null value relationship between cols, storing it as fdEdge will add complexity of traverse of a closure.
+type ncEdge struct {
+	// null constraints = determinants -> dependencies
+	// determinants = from
+	// dependencies = to
+	// when the `from` side is null, the `to` side must be null as well.
+	//  -------------------------------
+	//   e   f   a     b   c    d    e
+	//   1   2   1     2   1    1    1
+	//   2   2  null   2  null null null
+	//   3   3  null null null null null
+	// {b} -| {a,c,d,e}
+	// {a,c,d,e} -| {a,c,d,e}
+	from FastIntSet
+	to   FastIntSet
 }
 
 // FDSet is the main portal of functional dependency, it stores the relationship between (extended table / physical table)'s
 // columns. For more theory about this design, ref the head comments in the funcdep/doc.go.
 type FDSet struct {
 	fdEdges []*fdEdge
+	// after left join, according to rule 3.3.3, it may create a lax FD from inner equivalence
+	// cols pointing to outer equivalence cols.  eg: t left join t1 on t.a = t1.b, leading a
+	// lax FD from t1.b ~> t.a, this lax attribute is coming from supplied null value to all
+	// left rows, once there is a null-refusing predicate on the inner side on upper layer, this
+	// can be equivalence again. (the outer rows left are all coming from equal matching)
+	ncEdges []*fdEdge
 	// NotNullCols is used to record the columns with not-null attributes applied.
 	// eg: {1} ~~> {2,3}, when {2,3} not null is applied, it actually does nothing.
 	// but we should record {2,3} as not-null down for the convenience of transferring
@@ -49,19 +83,7 @@ type FDSet struct {
 	// GroupByCols is used to record columns / expressions that under the group by phrase.
 	GroupByCols FastIntSet
 	HasAggBuilt bool
-	// after left join, according to rule 3.3.3, it may create a lax FD from inner equivalence
-	// cols pointing to outer equivalence cols.  eg: t left join t1 on t.a = t1.b, leading a
-	// lax FD from t1.b ~> t.a, this lax attribute is coming from supplied null value to all
-	// left rows, once there is a null-refusing predicate on the inner side on upper layer, this
-	// can be equivalence again. (the outer rows left are all coming from equal matching)
-	//
-	// why not just makeNotNull of them, because even a non-equiv-related inner col can also
-	// refuse supplied null values.
 	// todo: when multi join and across select block, this may need to be maintained more precisely.
-	Rule333Equiv struct {
-		Edges     []*fdEdge
-		InnerCols FastIntSet
-	}
 }
 
 // ClosureOfStrict is exported for outer usage.
@@ -215,6 +237,19 @@ func (s *FDSet) AddLaxFunctionalDependency(from, to FastIntSet) {
 	s.addFunctionalDependency(from, to, false, false)
 }
 
+func (s *FDSet) AddNCFunctionalDependency(from, to, nc FastIntSet, strict, equiv bool) {
+	// Since nc edge is invisible by now, just collecting them together simply, once the
+	// null-reject on nc cols is satisfied, let's pick them out and insert into the fdEdge
+	// normally.
+	s.ncEdges = append(s.ncEdges, &fdEdge{
+		from:        from,
+		to:          to,
+		strict:      strict,
+		equiv:       equiv,
+		conditionNC: &nc,
+	})
+}
+
 // addFunctionalDependency will add strict/lax functional dependency to the fdGraph.
 // eg:
 // CREATE TABLE t (a int key, b int, c int, d int, e int, UNIQUE (b,c))
@@ -287,6 +322,7 @@ func (s *FDSet) addFunctionalDependency(from, to FastIntSet, strict, equiv bool)
 // implies is used to shrink the edge size, keeping the minimum of the functional dependency set size.
 func (e *fdEdge) implies(otherEdge *fdEdge) bool {
 	// The given one's from should be larger than the current one and the current one's to should be larger than the given one.
+	// ***************************** IMPLY IN SAME TYPE*********************************************
 	// STRICT FD:
 	// A --> C is stronger than AB --> C. --- YES
 	// A --> BC is stronger than A --> C. --- YES
@@ -294,6 +330,19 @@ func (e *fdEdge) implies(otherEdge *fdEdge) bool {
 	// LAX FD:
 	// 1: A ~~> C is stronger than AB ~~> C. --- YES
 	// 2: A ~~> BC is stronger than A ~~> C. --- NO
+	//
+	// STRICT EQ:
+	// since {superset} == {superset} won't collapse with each other. --- NO
+	//
+	// LAX EQ:
+	// 1: {A} ~= {C} is stronger than {AB} ~= {CD}. --- NO
+	// 2: {AB} ~= {CD} is stronger than {A} ~= {C}. --- NO
+	//
+	// ***************************** IMPLY IN DIFF TYPE*********************************************
+	// 1: {A} == {B} is stronger than {A} ~= {B}, {A} -> {B}, {A} ~> {B}
+	// 2: {A} ~= {B} is stronger than {A} ~> {B}
+	// 3: {A} -> {B} is stronger than {A} ~> {B}
+	//
 	// The precondition for 2 to be strict FD is much easier to satisfied than 1, only to
 	// need {a,c} is not null. So we couldn't merge this two to be one lax FD.
 	// but for strict/equiv FD implies lax FD, 1 & 2 is implied both reasonably.
@@ -425,6 +474,7 @@ func (s *FDSet) AddConstants(cons FastIntSet) {
 					shouldRemoved = true
 				}
 			}
+			// pre-condition NOTE 1 in doc.go, it won't occur duplicate definite determinant of Lax FD.
 			// for strict or lax FDs, both can reduce the dependencies side columns with constant closure.
 			if fd.removeColumnsToSide(cols) {
 				shouldRemoved = true
@@ -507,6 +557,29 @@ func (s *FDSet) EquivalenceCols() (eqs []*FastIntSet) {
 func (s *FDSet) MakeNotNull(notNullCols FastIntSet) {
 	notNullCols.UnionWith(s.NotNullCols)
 	notNullColsSet := s.closureOfEquivalence(notNullCols)
+	// make nc FD visible.
+	for i := 0; i < len(s.ncEdges); i++ {
+		fd := s.ncEdges[i]
+		if fd.conditionNC.Intersects(notNullColsSet) {
+			// condition satisfied.
+			s.ncEdges = append(s.ncEdges[:i], s.ncEdges[i+1:]...)
+			i--
+			if fd.isConstant() {
+				s.AddConstants(fd.to)
+			} else if fd.equiv {
+				s.AddEquivalence(fd.from, fd.to)
+				newNotNullColsSet := s.closureOfEquivalence(notNullColsSet)
+				if !newNotNullColsSet.Difference(notNullColsSet).IsEmpty() {
+					notNullColsSet = newNotNullColsSet
+					// expand not-null set.
+					i = -1
+				}
+			} else {
+				s.addFunctionalDependency(fd.from, fd.to, fd.strict, fd.equiv)
+			}
+		}
+	}
+	// make origin FD strengthened.
 	for i := 0; i < len(s.fdEdges); i++ {
 		fd := s.fdEdges[i]
 		if fd.strict {
@@ -711,18 +784,15 @@ func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols F
 			s.addFunctionalDependency(edge.from, edge.to, false, edge.equiv)
 		}
 	}
+	for _, edge := range innerFDs.ncEdges {
+		s.ncEdges = append(s.ncEdges, edge)
+	}
 	leftCombinedFDFrom := NewFastIntSet()
 	leftCombinedFDTo := NewFastIntSet()
 	for _, edge := range filterFDs.fdEdges {
 		// Rule #3.2, constant FD are removed from right side of left join.
 		if edge.isConstant() {
-			s.Rule333Equiv.Edges = append(s.Rule333Equiv.Edges, &fdEdge{
-				from:   edge.from,
-				to:     edge.to,
-				strict: edge.strict,
-				equiv:  edge.equiv,
-			})
-			s.Rule333Equiv.InnerCols = innerCols
+			s.AddNCFunctionalDependency(edge.from, edge.to, innerCols, edge.strict, edge.equiv)
 			continue
 		}
 		// Rule #3.3, we only keep the lax FD from right side pointing the left side.
@@ -764,13 +834,7 @@ func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols F
 					s.addFunctionalDependency(NewFastIntSet(i), NewFastIntSet(j), false, false)
 				}
 			}
-			s.Rule333Equiv.Edges = append(s.Rule333Equiv.Edges, &fdEdge{
-				from:   laxFDFrom,
-				to:     laxFDTo,
-				strict: true,
-				equiv:  true,
-			})
-			s.Rule333Equiv.InnerCols = innerCols
+			s.AddNCFunctionalDependency(equivColsLeft, equivColsRight, innerCols, true, true)
 		}
 		// Rule #3.1, filters won't produce any strict/lax FDs.
 	}
@@ -825,18 +889,6 @@ func (s *FDSet) MakeOuterJoin(innerFDs, filterFDs *FDSet, outerCols, innerCols F
 	}
 }
 
-func (s *FDSet) MakeRestoreRule333() {
-	for _, eg := range s.Rule333Equiv.Edges {
-		if eg.isConstant() {
-			s.AddConstants(eg.to)
-		} else {
-			s.AddEquivalence(eg.from, eg.to)
-		}
-	}
-	s.Rule333Equiv.Edges = nil
-	s.Rule333Equiv.InnerCols.Clear()
-}
-
 type ArgOpts struct {
 	SkipFDRule331   bool
 	TypeFDRule331   TypeFilterFD331
@@ -900,6 +952,10 @@ func (s *FDSet) AddFrom(fds *FDSet, acrossBlock bool) {
 			s.AddLaxFunctionalDependency(fd.from, fd.to)
 		}
 	}
+	for i := range fds.ncEdges {
+		fd := fds.ncEdges[i]
+		s.ncEdges = append(s.ncEdges, fd)
+	}
 	s.NotNullCols.UnionWith(fds.NotNullCols)
 	if s.HashCodeToUniqueID == nil {
 		s.HashCodeToUniqueID = fds.HashCodeToUniqueID
@@ -917,7 +973,6 @@ func (s *FDSet) AddFrom(fds *FDSet, acrossBlock bool) {
 		}
 		s.HasAggBuilt = fds.HasAggBuilt
 	}
-	s.Rule333Equiv = fds.Rule333Equiv
 }
 
 // MaxOneRow will regard every column in the fdSet as a constant. Since constant is stronger that strict FD, it will
diff --git a/planner/funcdep/only_full_group_by_test.go b/planner/funcdep/only_full_group_by_test.go
index 1edd739fa9c2b..65367fe36d76f 100644
--- a/planner/funcdep/only_full_group_by_test.go
+++ b/planner/funcdep/only_full_group_by_test.go
@@ -153,6 +153,7 @@ func TestOnlyFullGroupByOldCases(t *testing.T) {
 	// classic cases
 	tk.MustQuery("select customer1.a, count(*) from customer1 left join customer2 on customer1.a=customer2.b where customer2.pk in (7,9) group by customer2.b;")
 	tk.MustQuery("select customer1.a, count(*) from customer1 left join customer2 on customer1.a=1 where customer2.pk in (7,9) group by customer2.b;")
+	tk.MustQuery("select c1.a, count(*) from customer2 c3 left join (customer1 c1 left join customer2 c2 on c1.a=c2.b) on c3.b=c1.a where c2.pk in (7,9) group by c2.b;")
 	tk.MustExec("drop view if exists customer")
 	// this left join can extend left pk to all cols.
 	tk.MustExec("CREATE algorithm=merge definer='root'@'localhost' VIEW customer as SELECT pk,a,b FROM customer1 LEFT JOIN customer2 USING (pk);")

From a03a85397a36704816c710b3be95205820894baf Mon Sep 17 00:00:00 2001
From: AilinKid <314806019@qq.com>
Date: Wed, 20 Apr 2022 17:51:56 +0800
Subject: [PATCH 12/13] fix the refine test

Signed-off-by: AilinKid <314806019@qq.com>
---
 planner/core/logical_plans.go | 8 +-------
 planner/funcdep/fd_graph.go   | 9 ++++++---
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go
index 1f5028f780078..fe3ec4beeb8d8 100644
--- a/planner/core/logical_plans.go
+++ b/planner/core/logical_plans.go
@@ -15,10 +15,6 @@
 package core
 
 import (
-	"fmt"
-	"math"
-	"strings"
-
 	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/expression/aggregation"
 	"github.com/pingcap/tidb/infoschema"
@@ -36,6 +32,7 @@ import (
 	"github.com/pingcap/tidb/util/logutil"
 	"github.com/pingcap/tidb/util/ranger"
 	"go.uber.org/zap"
+	"math"
 )
 
 var (
@@ -368,9 +365,6 @@ func (p *LogicalJoin) extractFDForOuterJoin(filtersFromApply []expression.Expres
 	}
 	fds := outerFD
 
-	if strings.HasPrefix(p.ctx.GetSessionVars().StmtCtx.OriginalSQL, "select c1.a, count(*) from customer2 c3 left join (customer1 c1 left join customer2 c2 on c1.a=c2.b) on c3.b=c1.a where c2.pk in (7,9) group by c2.b") {
-		fmt.Println(1)
-	}
 	fds.MakeOuterJoin(innerFD, filterFD, outerCols, innerCols, &opt, innerAcrossBlock)
 	p.fdSet = fds
 	return fds
diff --git a/planner/funcdep/fd_graph.go b/planner/funcdep/fd_graph.go
index 51bdf95bb43a4..4ac9c8d484512 100644
--- a/planner/funcdep/fd_graph.go
+++ b/planner/funcdep/fd_graph.go
@@ -1107,9 +1107,12 @@ func (s *FDSet) ProjectCols(cols FastIntSet) {
 					continue
 				}
 			}
-			if fd.removeColumnsToSide(fd.from) {
-				// fd.to side is empty, remove this FD.
-				continue
+			// from and to side of equiv are same, don't do trivial elimination.
+			if !fd.isEquivalence() {
+				if fd.removeColumnsToSide(fd.from) {
+					// fd.to side is empty, remove this FD.
+					continue
+				}
 			}
 		}
 

From b3b600e3a3f17ee168069c68fcadd557ff2f57dd Mon Sep 17 00:00:00 2001
From: Arenatlx <ailinsilence4@gmail.com>
Date: Mon, 25 Apr 2022 15:25:01 +0800
Subject: [PATCH 13/13] Update planner/core/logical_plan_builder.go

Co-authored-by: Yiding Cui <winoros@gmail.com>
---
 planner/core/logical_plan_builder.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go
index 9321309bdf523..3591973079a8e 100644
--- a/planner/core/logical_plan_builder.go
+++ b/planner/core/logical_plan_builder.go
@@ -1942,7 +1942,7 @@ func (b *PlanBuilder) buildSortCheck(ctx context.Context, p LogicalPlan, sel *as
 			if sel.Distinct {
 				// order-by with distinct case
 				// Rule #1: order by item should be in the select filed list
-				// Rule #2: the base col that order by item dependent on should be in the select field list
+				// Rule #2: the base col that order by item is dependent on should be in the select field list
 				for offset, odrItem := range orderByItemExprs {
 					item := fd.NewFastIntSet()
 					switch x := odrItem.Expr.(type) {