Skip to content

Commit

Permalink
Add additional test coverage for aggregaes using dates/times/timestam…
Browse files Browse the repository at this point in the history
…ps/decimals (apache#6939)

* Add additional test coverage for aggregaes using dates/times/timestamps/decimals

* Add coverage for date32/date64
  • Loading branch information
alamb committed Jul 13, 2023
1 parent 54f96e6 commit d96dfa2
Showing 1 changed file with 194 additions and 38 deletions.
232 changes: 194 additions & 38 deletions datafusion/core/tests/sqllogictests/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@ WITH HEADER ROW
LOCATION '../../testing/data/csv/aggregate_test_100.csv'

statement ok
CREATE TABLE d_table (c1 decimal(10,3)) as values
(110.000), (110.001), (110.002), (110.003), (110.004), (110.005), (110.006), (110.007), (110.008), (110.009),
(-100.000),(-100.001),(-100.002),(-100.003),(-100.004),(-100.005),(-100.006),(-100.007),(-100.008),(-100.009)
CREATE TABLE d_table (c1 decimal(10,3), c2 varchar)
as values
(110.000, 'A'), (110.001, 'A'), (110.002, 'A'), (110.003, 'A'), (110.004, 'A'), (110.005, 'A'), (110.006, 'A'), (110.007, 'A'), (110.008, 'A'), (110.009, 'A'),
(-100.000, 'B'),(-100.001, 'B'),(-100.002, 'B'),(-100.003, 'B'),(-100.004, 'B'),(-100.005, 'B'),(-100.006, 'B'),(-100.007, 'B'),(-100.008, 'B'),(-100.009, 'B')

statement ok
CREATE TABLE median_table (
Expand Down Expand Up @@ -448,7 +449,7 @@ drop table cpu;

# this test is to show create table as and select into works in the same way
statement ok
SELECT * INTO cpu
SELECT * INTO cpu
FROM (VALUES
('host0', 90.1),
('host1', 90.2),
Expand Down Expand Up @@ -1483,22 +1484,6 @@ NULL 2
statement ok
drop table the_nulls;

# All supported timestamp types

# "nanos" --> TimestampNanosecondArray
# "micros" --> TimestampMicrosecondArray
# "millis" --> TimestampMillisecondArray
# "secs" --> TimestampSecondArray
# "names" --> StringArray

statement ok
create table t_source
as values
('2018-11-13T17:11:10.011375885995', 'Row 0'),
('2011-12-13T11:13:10.12345', 'Row 1'),
(null, 'Row 2'),
('2021-01-01T05:11:10.432', 'Row 3');

statement ok
create table bit_aggregate_functions (
c1 SMALLINT NOT NULL,
Expand Down Expand Up @@ -1568,62 +1553,196 @@ SELECT bool_or(distinct c1), bool_or(distinct c2), bool_or(distinct c3), bool_or
----
true true true false true true false NULL

# All supported timestamp types

# "nanos" --> TimestampNanosecondArray
# "micros" --> TimestampMicrosecondArray
# "millis" --> TimestampMillisecondArray
# "secs" --> TimestampSecondArray
# "names" --> StringArray

statement ok
create table t_source
as values
('2018-11-13T17:11:10.011375885995', 'Row 0', 'X'),
('2011-12-13T11:13:10.12345', 'Row 1', 'X'),
(null, 'Row 2', 'Y'),
('2021-01-01T05:11:10.432', 'Row 3', 'Y');

statement ok
create table t as
select
arrow_cast(column1, 'Timestamp(Nanosecond, None)') as nanos,
arrow_cast(column1, 'Timestamp(Microsecond, None)') as micros,
arrow_cast(column1, 'Timestamp(Millisecond, None)') as millis,
arrow_cast(column1, 'Timestamp(Second, None)') as secs,
column2 as names
column2 as names,
column3 as tag
from t_source;

# Demonstate the contents
query PPPPT
query PPPPTT
select * from t;
----
2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0
2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10 Row 1
NULL NULL NULL NULL Row 2
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3
2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 X
2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10 Row 1 X
NULL NULL NULL NULL Row 2 Y
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3 Y


# aggregate_timestamps_sum
statement error Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)
statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)\.
SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t;

statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)\.
SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY tag ORDER BY tag;

# aggregate_timestamps_count
query IIII
SELECT count(nanos), count(micros), count(millis), count(secs) FROM t;
----
3 3 3 3

query TIIII
SELECT tag, count(nanos), count(micros), count(millis), count(secs) FROM t GROUP BY tag ORDER BY tag;
----
X 2 2 2 2
Y 1 1 1 1

# aggregate_timestamps_min
query PPPP
SELECT min(nanos), min(micros), min(millis), min(secs) FROM t;
----
2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10

query TPPPP
SELECT tag, min(nanos), min(micros), min(millis), min(secs) FROM t GROUP BY tag ORDER BY tag;
----
X 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10
Y 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10

# aggregate_timestamps_max
query PPPP
SELECT max(nanos), max(micros), max(millis), max(secs) FROM t;
----
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10

query TPPPP
SELECT tag, max(nanos), max(micros), max(millis), max(secs) FROM t GROUP BY tag ORDER BY tag
----
X 2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10
Y 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10


# aggregate_timestamps_avg
statement error Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\).
statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\)\.
SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t

statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\)\.
SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY tag ORDER BY tag;


statement ok
drop table t_source;

statement ok
drop table t;


# All supported Date tpes

# "date32" --> Date32Array
# "date64" --> Date64Array
# "names" --> StringArray

statement ok
create table t_source
as values
('2018-11-13', 'Row 0', 'X'),
('2011-12-13', 'Row 1', 'X'),
(null, 'Row 2', 'Y'),
('2021-01-01', 'Row 3', 'Y');

statement ok
create table t as
select
arrow_cast(column1, 'Date32') as date32,
-- Workaround https://github.com/apache/arrow-rs/issues/4512 is fixed, can use this
-- arrow_cast(column1, 'Date64') as date64,
arrow_cast(arrow_cast(column1, 'Date32'), 'Date64') as date64,
column2 as names,
column3 as tag
from t_source;

# Demonstate the contents
query DDTT
select * from t;
----
2018-11-13 2018-11-13T00:00:00 Row 0 X
2011-12-13 2011-12-13T00:00:00 Row 1 X
NULL NULL Row 2 Y
2021-01-01 2021-01-01T00:00:00 Row 3 Y


# aggregate_timestamps_sum
statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Date32\.
SELECT sum(date32), sum(date64) FROM t;

statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Date32\.
SELECT tag, sum(date32), sum(date64) FROM t GROUP BY tag ORDER BY tag;

# aggregate_timestamps_count
query II
SELECT count(date32), count(date64) FROM t;
----
3 3

query TII
SELECT tag, count(date32), count(date64) FROM t GROUP BY tag ORDER BY tag;
----
X 2 2
Y 1 1

# aggregate_timestamps_min
query DD
SELECT min(date32), min(date64) FROM t;
----
2011-12-13 2011-12-13T00:00:00

query TDD
SELECT tag, min(date32), min(date64) FROM t GROUP BY tag ORDER BY tag;
----
X 2011-12-13 2011-12-13T00:00:00
Y 2021-01-01 2021-01-01T00:00:00

# aggregate_timestamps_max
query DD
SELECT max(date32), max(date64) FROM t;
----
2021-01-01 2021-01-01T00:00:00

query TDD
SELECT tag, max(date32), max(date64) FROM t GROUP BY tag ORDER BY tag
----
X 2018-11-13 2018-11-13T00:00:00
Y 2021-01-01 2021-01-01T00:00:00


# aggregate_timestamps_avg
statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Date32\.
SELECT avg(date32), avg(date64) FROM t

statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Date32\.
SELECT tag, avg(date32), avg(date64) FROM t GROUP BY tag ORDER BY tag;


statement ok
drop table t_source;

statement ok
drop table t;


# All supported time types

# Columns are named:
Expand All @@ -1636,10 +1755,10 @@ drop table t;
statement ok
create table t_source
as values
('18:06:30.243620451', 'Row 0'),
('20:08:28.161121654', 'Row 1'),
('19:11:04.156423842', 'Row 2'),
('21:06:28.247821084', 'Row 3');
('18:06:30.243620451', 'Row 0', 'A'),
('20:08:28.161121654', 'Row 1', 'A'),
('19:11:04.156423842', 'Row 2', 'B'),
('21:06:28.247821084', 'Row 3', 'B');


statement ok
Expand All @@ -1649,46 +1768,71 @@ select
arrow_cast(column1, 'Time64(Microsecond)') as micros,
arrow_cast(column1, 'Time32(Millisecond)') as millis,
arrow_cast(column1, 'Time32(Second)') as secs,
column2 as names
column2 as names,
column3 as tag
from t_source;

# Demonstate the contents
query DDDDT
query DDDDTT
select * from t;
----
18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30 Row 0
20:08:28.161121654 20:08:28.161121 20:08:28.161 20:08:28 Row 1
19:11:04.156423842 19:11:04.156423 19:11:04.156 19:11:04 Row 2
21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28 Row 3
18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30 Row 0 A
20:08:28.161121654 20:08:28.161121 20:08:28.161 20:08:28 Row 1 A
19:11:04.156423842 19:11:04.156423 19:11:04.156 19:11:04 Row 2 B
21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28 Row 3 B

# aggregate_times_sum
statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Time64\(Nanosecond\).
SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t

statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Time64\(Nanosecond\)\.
SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY tag ORDER BY tag

# aggregate_times_count
query IIII
SELECT count(nanos), count(micros), count(millis), count(secs) FROM t
----
4 4 4 4

query TIIII
SELECT tag, count(nanos), count(micros), count(millis), count(secs) FROM t GROUP BY tag ORDER BY tag
----
A 2 2 2 2
B 2 2 2 2


# aggregate_times_min
query DDDD
SELECT min(nanos), min(micros), min(millis), min(secs) FROM t
----
18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30

query TDDDD
SELECT tag, min(nanos), min(micros), min(millis), min(secs) FROM t GROUP BY tag ORDER BY tag
----
A 18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30
B 19:11:04.156423842 19:11:04.156423 19:11:04.156 19:11:04

# aggregate_times_max
query DDDD
SELECT max(nanos), max(micros), max(millis), max(secs) FROM t
----
21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28

query TDDDD
SELECT tag, max(nanos), max(micros), max(millis), max(secs) FROM t GROUP BY tag ORDER BY tag
----
A 20:08:28.161121654 20:08:28.161121 20:08:28.161 20:08:28
B 21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28


# aggregate_times_avg
statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Time64\(Nanosecond\).
SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t

statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Time64\(Nanosecond\)\.
SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY tag ORDER BY tag;

statement ok
drop table t_source;

Expand All @@ -1710,13 +1854,25 @@ select sum(c1), arrow_typeof(sum(c1)) from d_table;
----
100 Decimal128(20, 3)

query TRT
select c2, sum(c1), arrow_typeof(sum(c1)) from d_table GROUP BY c2 ORDER BY c2;
----
A 1100.045 Decimal128(20, 3)
B -1000.045 Decimal128(20, 3)


# aggregate_decimal_avg
query RT
select avg(c1), arrow_typeof(avg(c1)) from d_table
----
5 Decimal128(14, 7)

query TRT
select c2, avg(c1), arrow_typeof(avg(c1)) from d_table GROUP BY c2 ORDER BY c2
----
A 110.0045 Decimal128(14, 7)
B -100.0045 Decimal128(14, 7)

# Use PostgresSQL dialect
statement ok
set datafusion.sql_parser.dialect = 'Postgres';
Expand Down

0 comments on commit d96dfa2

Please sign in to comment.