Skip to content

Commit

Permalink
fixup! refactor!: default to nanoseconds
Browse files Browse the repository at this point in the history
  • Loading branch information
varshith257 committed Jan 13, 2025
1 parent 3b5d472 commit 04a1c60
Show file tree
Hide file tree
Showing 25 changed files with 147 additions and 289 deletions.
31 changes: 7 additions & 24 deletions crates/proof-of-sql-parser/src/posql_time/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,8 @@ impl PoSQLTimestamp {

let offset_seconds = dt.offset().local_minus_utc();
let timezone = PoSQLTimeZone::new(offset_seconds);
let nanoseconds = dt.timestamp_subsec_nanos();
let timeunit = if nanoseconds % 1_000 != 0 {
PoSQLTimeUnit::Nanosecond
} else if nanoseconds % 1_000_000 != 0 {
PoSQLTimeUnit::Microsecond
} else if nanoseconds % 1_000_000_000 != 0 {
PoSQLTimeUnit::Millisecond
} else {
PoSQLTimeUnit::Second
};
let _ = dt.timestamp_subsec_nanos();
let timeunit = PoSQLTimeUnit::Nanosecond;

Ok(PoSQLTimestamp {
timestamp: dt.with_timezone(&Utc),
Expand Down Expand Up @@ -130,7 +122,7 @@ impl PoSQLTimestamp {
match Utc.timestamp_opt(epoch, 0) {
LocalResult::Single(timestamp) => Ok(PoSQLTimestamp {
timestamp,
timeunit: PoSQLTimeUnit::Second,
timeunit: PoSQLTimeUnit::Nanosecond,
timezone: PoSQLTimeZone::utc(),
}),
LocalResult::Ambiguous(earliest, latest) => Err(PoSQLTimestampError::Ambiguous{ error:
Expand All @@ -156,9 +148,10 @@ mod tests {
#[test]
fn test_unix_epoch_timestamp_parsing() {
let unix_time = 1_231_006_505; // Example Unix timestamp (seconds since epoch)
let expected_datetime = Utc.timestamp_opt(unix_time, 0).unwrap();
let expected_unit = PoSQLTimeUnit::Second; // Assuming basic second precision for Unix timestamp
let input = unix_time; // Simulate input as string since Unix times are often transmitted as strings
let unix_time_in_nanoseconds = unix_time * 1_000_000_000;
let expected_datetime = Utc.timestamp_opt(unix_time_in_nanoseconds, 0).unwrap();
let expected_unit = PoSQLTimeUnit::Nanosecond;
let input = unix_time_in_nanoseconds; // Simulate input as string since Unix times are often transmitted as strings
let result = PoSQLTimestamp::to_timestamp(input).unwrap();

assert_eq!(result.timestamp, expected_datetime);
Expand Down Expand Up @@ -208,16 +201,6 @@ mod tests {
);
}

#[test]
fn test_timestamp_with_seconds() {
let input = "2023-06-26T12:34:56Z";
let expected_time = Utc.with_ymd_and_hms(2023, 6, 26, 12, 34, 56).unwrap();
let expected_unit = PoSQLTimeUnit::Second;
let result = PoSQLTimestamp::try_from(input).unwrap();
assert_eq!(result.timestamp, expected_time);
assert_eq!(result.timeunit, expected_unit);
}

#[test]
fn test_general_parsing_error() {
// This test assumes that there's a catch-all parsing error case that isn't covered by the more specific errors.
Expand Down
44 changes: 1 addition & 43 deletions crates/proof-of-sql-parser/src/posql_time/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,13 @@ use serde::{Deserialize, Serialize};
#[allow(clippy::module_name_repetitions)]
#[derive(Debug, Clone, Copy, Hash, Serialize, Deserialize, PartialEq, Eq)]
pub enum PoSQLTimeUnit {
/// Represents seconds with precision 0: ex "2024-06-20 12:34:56"
Second,
/// Represents milliseconds with precision 3: ex "2024-06-20 12:34:56.123"
Millisecond,
/// Represents microseconds with precision 6: ex "2024-06-20 12:34:56.123456"
Microsecond,
/// Represents nanoseconds with precision 9: ex "2024-06-20 12:34:56.123456789"
/// Defaults and Represents nanoseconds with precision 9: ex "2024-06-20 12:34:56.123456789"
Nanosecond,
}

impl From<PoSQLTimeUnit> for u64 {
fn from(value: PoSQLTimeUnit) -> u64 {
match value {
PoSQLTimeUnit::Second => 0,
PoSQLTimeUnit::Millisecond => 3,
PoSQLTimeUnit::Microsecond => 6,
PoSQLTimeUnit::Nanosecond => 9,
}
}
Expand All @@ -31,9 +22,6 @@ impl TryFrom<&str> for PoSQLTimeUnit {
type Error = PoSQLTimestampError;
fn try_from(value: &str) -> Result<Self, PoSQLTimestampError> {
match value {
"0" => Ok(PoSQLTimeUnit::Second),
"3" => Ok(PoSQLTimeUnit::Millisecond),
"6" => Ok(PoSQLTimeUnit::Microsecond),
"9" => Ok(PoSQLTimeUnit::Nanosecond),
_ => Err(PoSQLTimestampError::UnsupportedPrecision {
error: value.into(),
Expand All @@ -45,9 +33,6 @@ impl TryFrom<&str> for PoSQLTimeUnit {
impl fmt::Display for PoSQLTimeUnit {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
PoSQLTimeUnit::Second => write!(f, "seconds (precision: 0)"),
PoSQLTimeUnit::Millisecond => write!(f, "milliseconds (precision: 3)"),
PoSQLTimeUnit::Microsecond => write!(f, "microseconds (precision: 6)"),
PoSQLTimeUnit::Nanosecond => write!(f, "nanoseconds (precision: 9)"),
}
}
Expand All @@ -64,9 +49,6 @@ mod time_unit_tests {

#[test]
fn test_valid_precisions() {
assert_eq!(PoSQLTimeUnit::try_from("0"), Ok(PoSQLTimeUnit::Second));
assert_eq!(PoSQLTimeUnit::try_from("3"), Ok(PoSQLTimeUnit::Millisecond));
assert_eq!(PoSQLTimeUnit::try_from("6"), Ok(PoSQLTimeUnit::Microsecond));
assert_eq!(PoSQLTimeUnit::try_from("9"), Ok(PoSQLTimeUnit::Nanosecond));
}

Expand All @@ -83,30 +65,6 @@ mod time_unit_tests {
));
}
}

#[test]
fn test_rfc3339_timestamp_with_milliseconds() {
let input = "2023-06-26T12:34:56.123Z";
let expected = Utc.ymd(2023, 6, 26).and_hms_milli(12, 34, 56, 123);
let result = PoSQLTimestamp::try_from(input).unwrap();
assert_eq!(result.timeunit(), PoSQLTimeUnit::Millisecond);
assert_eq!(
result.timestamp().timestamp_millis(),
expected.timestamp_millis()
);
}

#[test]
fn test_rfc3339_timestamp_with_microseconds() {
let input = "2023-06-26T12:34:56.123456Z";
let expected = Utc.ymd(2023, 6, 26).and_hms_micro(12, 34, 56, 123_456);
let result = PoSQLTimestamp::try_from(input).unwrap();
assert_eq!(result.timeunit(), PoSQLTimeUnit::Microsecond);
assert_eq!(
result.timestamp().timestamp_micros(),
expected.timestamp_micros()
);
}
#[test]
fn test_rfc3339_timestamp_with_nanoseconds() {
let input = "2023-06-26T12:34:56.123456789Z";
Expand Down
2 changes: 1 addition & 1 deletion crates/proof-of-sql/benches/bench_append_rows.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ pub fn generate_random_owned_table<S: Scalar>(
"int" => columns.push(int(&*identifier, vec![rng.gen::<i32>(); num_rows])),
"timestamptz" => columns.push(timestamptz(
&*identifier,
PoSQLTimeUnit::Second,
PoSQLTimeUnit::Nanosecond,
TimezoneInfo::None,
vec![rng.gen::<i64>(); num_rows],
)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ impl ArrayRefExt for ArrayRef {
ArrowTimeUnit::Second => {
if let Some(array) = self.as_any().downcast_ref::<TimestampSecondArray>() {
Ok(Column::TimestampTZ(
PoSQLTimeUnit::Second,
PoSQLTimeUnit::Nanosecond,
timezone.into(),
&array.values()[range.start..range.end],
))
Expand All @@ -223,7 +223,7 @@ impl ArrayRefExt for ArrayRef {
self.as_any().downcast_ref::<TimestampMillisecondArray>()
{
Ok(Column::TimestampTZ(
PoSQLTimeUnit::Millisecond,
PoSQLTimeUnit::Nanosecond,
timezone.into(),
&array.values()[range.start..range.end],
))
Expand All @@ -238,7 +238,7 @@ impl ArrayRefExt for ArrayRef {
self.as_any().downcast_ref::<TimestampMicrosecondArray>()
{
Ok(Column::TimestampTZ(
PoSQLTimeUnit::Microsecond,
PoSQLTimeUnit::Nanosecond,
timezone.into(),
&array.values()[range.start..range.end],
))
Expand Down Expand Up @@ -315,7 +315,7 @@ mod tests {
let result = array.to_column::<TestScalar>(&alloc, &(1..3), None);
assert_eq!(
result.unwrap(),
Column::TimestampTZ(PoSQLTimeUnit::Second, TimezoneInfo::None, &data[1..3])
Column::TimestampTZ(PoSQLTimeUnit::Nanosecond, TimezoneInfo::None, &data[1..3])
);
}

Expand All @@ -333,7 +333,7 @@ mod tests {
.unwrap();
assert_eq!(
result,
Column::TimestampTZ(PoSQLTimeUnit::Second, TimezoneInfo::None, &[])
Column::TimestampTZ(PoSQLTimeUnit::Nanosecond, TimezoneInfo::None, &[])
);
}

Expand All @@ -349,7 +349,7 @@ mod tests {
let result = array.to_column::<DoryScalar>(&alloc, &(1..1), None);
assert_eq!(
result.unwrap(),
Column::TimestampTZ(PoSQLTimeUnit::Second, TimezoneInfo::None, &[])
Column::TimestampTZ(PoSQLTimeUnit::Nanosecond, TimezoneInfo::None, &[])
);
}

Expand Down Expand Up @@ -1016,7 +1016,7 @@ mod tests {
.unwrap();
assert_eq!(
result,
Column::TimestampTZ(PoSQLTimeUnit::Second, TimezoneInfo::None, &data[..])
Column::TimestampTZ(PoSQLTimeUnit::Nanosecond, TimezoneInfo::None, &data[..])
);
}

Expand Down Expand Up @@ -1086,7 +1086,7 @@ mod tests {
array
.to_column::<TestScalar>(&alloc, &(1..3), None)
.unwrap(),
Column::TimestampTZ(PoSQLTimeUnit::Second, TimezoneInfo::None, &data[1..3])
Column::TimestampTZ(PoSQLTimeUnit::Nanosecond, TimezoneInfo::None, &data[1..3])
);
}

Expand Down Expand Up @@ -1144,7 +1144,7 @@ mod tests {
.unwrap();
assert_eq!(
result,
Column::TimestampTZ(PoSQLTimeUnit::Second, TimezoneInfo::None, &[])
Column::TimestampTZ(PoSQLTimeUnit::Nanosecond, TimezoneInfo::None, &[])
);
}
}
25 changes: 6 additions & 19 deletions crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,9 @@ impl From<&ColumnType> for DataType {
}
ColumnType::VarChar => DataType::Utf8,
ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"),
ColumnType::TimestampTZ(timeunit, timezone) => {
ColumnType::TimestampTZ(_, timezone) => {
let arrow_timezone = Some(Arc::from(timezone.to_string()));
let arrow_timeunit = match timeunit {
PoSQLTimeUnit::Second => ArrowTimeUnit::Second,
PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond,
PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond,
PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond,
};
let arrow_timeunit = ArrowTimeUnit::Nanosecond;
DataType::Timestamp(arrow_timeunit, arrow_timezone)
}
}
Expand All @@ -50,18 +45,10 @@ impl TryFrom<DataType> for ColumnType {
DataType::Decimal256(precision, scale) if precision <= 75 => {
Ok(ColumnType::Decimal75(Precision::new(precision)?, scale))
}
DataType::Timestamp(time_unit, timezone_option) => {
let posql_time_unit = match time_unit {
ArrowTimeUnit::Second => PoSQLTimeUnit::Second,
ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond,
ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond,
ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond,
};
Ok(ColumnType::TimestampTZ(
posql_time_unit,
PoSQLTimeZone::try_from(&timezone_option)?.into(),
))
}
DataType::Timestamp(_, timezone_option) => Ok(ColumnType::TimestampTZ(
PoSQLTimeUnit::Nanosecond,
PoSQLTimeZone::try_from(&timezone_option)?.into(),
)),
DataType::Utf8 => Ok(ColumnType::VarChar),
_ => Err(format!("Unsupported arrow data type {data_type:?}")),
}
Expand Down
55 changes: 5 additions & 50 deletions crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ use alloc::sync::Arc;
use arrow::{
array::{
ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array,
Int64Array, Int8Array, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray,
TimestampNanosecondArray, TimestampSecondArray,
Int64Array, Int8Array, StringArray, TimestampNanosecondArray,
},
datatypes::{i256, DataType, Schema, SchemaRef, TimeUnit as ArrowTimeUnit},
error::ArrowError,
Expand Down Expand Up @@ -104,12 +103,7 @@ impl<S: Scalar> From<OwnedColumn<S>> for ArrayRef {
}
OwnedColumn::Scalar(_) => unimplemented!("Cannot convert Scalar type to arrow type"),
OwnedColumn::VarChar(col) => Arc::new(StringArray::from(col)),
OwnedColumn::TimestampTZ(time_unit, _, col) => match time_unit {
PoSQLTimeUnit::Second => Arc::new(TimestampSecondArray::from(col)),
PoSQLTimeUnit::Millisecond => Arc::new(TimestampMillisecondArray::from(col)),
PoSQLTimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from(col)),
PoSQLTimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from(col)),
},
OwnedColumn::TimestampTZ(_, _, col) => Arc::new(TimestampNanosecondArray::from(col)),
}
}
}
Expand Down Expand Up @@ -228,48 +222,6 @@ impl<S: Scalar> TryFrom<&ArrayRef> for OwnedColumn<S> {
.collect(),
)),
DataType::Timestamp(time_unit, timezone) => match time_unit {
ArrowTimeUnit::Second => {
let array = value
.as_any()
.downcast_ref::<TimestampSecondArray>()
.expect(
"This cannot fail, all Arrow TimeUnits are mapped to PoSQL TimeUnits",
);
let timestamps = array.values().iter().copied().collect::<Vec<i64>>();
Ok(OwnedColumn::TimestampTZ(
PoSQLTimeUnit::Second,
PoSQLTimeZone::try_from(timezone)?.into(),
timestamps,
))
}
ArrowTimeUnit::Millisecond => {
let array = value
.as_any()
.downcast_ref::<TimestampMillisecondArray>()
.expect(
"This cannot fail, all Arrow TimeUnits are mapped to PoSQL TimeUnits",
);
let timestamps = array.values().iter().copied().collect::<Vec<i64>>();
Ok(OwnedColumn::TimestampTZ(
PoSQLTimeUnit::Millisecond,
PoSQLTimeZone::try_from(timezone)?.into(),
timestamps,
))
}
ArrowTimeUnit::Microsecond => {
let array = value
.as_any()
.downcast_ref::<TimestampMicrosecondArray>()
.expect(
"This cannot fail, all Arrow TimeUnits are mapped to PoSQL TimeUnits",
);
let timestamps = array.values().iter().copied().collect::<Vec<i64>>();
Ok(OwnedColumn::TimestampTZ(
PoSQLTimeUnit::Microsecond,
PoSQLTimeZone::try_from(timezone)?.into(),
timestamps,
))
}
ArrowTimeUnit::Nanosecond => {
let array = value
.as_any()
Expand All @@ -284,6 +236,9 @@ impl<S: Scalar> TryFrom<&ArrayRef> for OwnedColumn<S> {
timestamps,
))
}
_ => Err(OwnedArrowConversionError::UnsupportedType {
datatype: DataType::Timestamp(time_unit.clone(), timezone.clone()),
}),
},
&data_type => Err(OwnedArrowConversionError::UnsupportedType {
datatype: data_type.clone(),
Expand Down
Loading

0 comments on commit 04a1c60

Please sign in to comment.