Skip to content

Commit

Permalink
[SPARK-29343][SQL][FOLLOW-UP] Remove floating-point Sum/Average/Centr…
Browse files Browse the repository at this point in the history
…alMomentAgg from order-insensitive aggregates

### What changes were proposed in this pull request?

This pr is to remove floating-point `Sum/Average/CentralMomentAgg` from order-insensitive aggregates in `EliminateSorts`.

This pr comes from the gatorsmile suggestion: apache#26011 (comment)

### Why are the changes needed?

Bug fix.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Added tests in `SubquerySuite`.

Closes apache#26534 from maropu/SPARK-29343-FOLLOWUP.

Authored-by: Takeshi Yamamuro <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
  • Loading branch information
maropu authored and dongjoon-hyun committed Nov 16, 2019
1 parent 16e7195 commit 6d6b233
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1002,12 +1002,11 @@ object EliminateSorts extends Rule[LogicalPlan] {

private def isOrderIrrelevantAggs(aggs: Seq[NamedExpression]): Boolean = {
def isOrderIrrelevantAggFunction(func: AggregateFunction): Boolean = func match {
case _: Sum => true
case _: Min => true
case _: Max => true
case _: Count => true
case _: Average => true
case _: CentralMomentAgg => true
case _: Min | _: Max | _: Count => true
// Arithmetic operations for floating-point values are order-sensitive
// (they are not associative).
case _: Sum | _: Average | _: CentralMomentAgg =>
!Seq(FloatType, DoubleType).exists(_.sameType(func.children.head.dataType))
case _ => false
}

Expand Down
17 changes: 17 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,23 @@ class SubquerySuite extends QueryTest with SharedSparkSession {
}
}

test("Cannot remove sort for floating-point order-sensitive aggregates from subquery") {
Seq("float", "double").foreach { typeName =>
Seq("SUM", "AVG", "KURTOSIS", "SKEWNESS", "STDDEV_POP", "STDDEV_SAMP",
"VAR_POP", "VAR_SAMP").foreach { aggName =>
val query =
s"""
|SELECT k, $aggName(v) FROM (
| SELECT k, v
| FROM VALUES (1, $typeName(2.0)), (2, $typeName(1.0)) t(k, v)
| ORDER BY v)
|GROUP BY k
""".stripMargin
assert(getNumSortsInQuery(query) == 1)
}
}
}

test("SPARK-25482: Forbid pushdown to datasources of filters containing subqueries") {
withTempView("t1", "t2") {
sql("create temporary view t1(a int) using parquet")
Expand Down

0 comments on commit 6d6b233

Please sign in to comment.