Skip to content

Commit

Permalink
fix nan bug
Browse files Browse the repository at this point in the history
  • Loading branch information
akurmustafa committed Nov 15, 2024
1 parent e25f5e7 commit 336bca1
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 4 deletions.
17 changes: 13 additions & 4 deletions datafusion/functions-aggregate/src/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ use datafusion_common::{
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
use datafusion_physical_expr::expressions;
use std::cmp::Ordering;
use std::fmt::Debug;

use arrow::datatypes::i256;
Expand Down Expand Up @@ -113,8 +114,12 @@ macro_rules! primitive_max_accumulator {
($DATA_TYPE:ident, $NATIVE:ident, $PRIMTYPE:ident) => {{
Ok(Box::new(
PrimitiveGroupsAccumulator::<$PRIMTYPE, _>::new($DATA_TYPE, |cur, new| {
if *cur < new {
*cur = new
match (new).partial_cmp(cur) {
None | Some(Ordering::Greater) => {
// new is NaN or greater
*cur = new
}
_ => {}
}
})
// Initialize each accumulator to $NATIVE::MIN
Expand All @@ -132,8 +137,12 @@ macro_rules! primitive_min_accumulator {
($DATA_TYPE:ident, $NATIVE:ident, $PRIMTYPE:ident) => {{
Ok(Box::new(
PrimitiveGroupsAccumulator::<$PRIMTYPE, _>::new(&$DATA_TYPE, |cur, new| {
if *cur > new {
*cur = new
match (new).partial_cmp(cur) {
None | Some(Ordering::Less) => {
// new is NaN or Less
*cur = new
}
_ => {}
}
})
// Initialize each accumulator to $NATIVE::MAX
Expand Down
16 changes: 16 additions & 0 deletions datafusion/sqllogictest/test_files/group_by.slt
Original file line number Diff line number Diff line change
Expand Up @@ -5271,3 +5271,19 @@ drop view t

statement ok
drop table source;

# Test whether min, max accumulator produces NaN result when input is NaN.
# See https://github.com/apache/datafusion/issues/13415 for rationale
statement ok
CREATE TABLE input_table (
"row" integer,
"x" double precision
);

statement ok
INSERT INTO input_table VALUES (1, 'NaN');

query RR
SELECT max(input_table.x), min(input_table.x) from input_table GROUP BY input_table."row";
----
NaN NaN

0 comments on commit 336bca1

Please sign in to comment.