Skip to content

Commit

Permalink
parser: refactor tests so it is easier to test different cases
Browse files Browse the repository at this point in the history
  • Loading branch information
mdibaiee committed Oct 12, 2023
1 parent 3dd6d5d commit 9514bfb
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 32 deletions.
17 changes: 10 additions & 7 deletions crates/parser/src/format/sanitize/datetime.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{ParseConfig, Output, format::ParseResult, ParseError};
use crate::{ParseConfig, Output, format::ParseResult};
use time::macros::format_description;
use serde_json::Value;

Expand All @@ -11,6 +11,11 @@ struct DatetimeSanitizer {
fn datetime_to_rfc3339(val: &mut Value, default_offset: time::UtcOffset) {
match val {
Value::String(s) => {
// We first try to parse a more relaxed format that allows all the different formats we
// support. At this stage we are trying to see if the value we see is a timestamp that
// we can parse at all. If we are successful at parsing this value, then we try to
// parse a more specific format for timestamps *with timezone*. If we are successful,
// we use the parsed timezone, otherwise we use the default offset provided.
let primitive_format = format_description!(
version = 2,
"[year]-[month]-[day][optional [T]][optional [ ]][hour]:[minute]:[second][optional [.[subsecond]]][optional [Z]][optional [z]][optional [[offset_hour]:[offset_minute]]]"
Expand All @@ -22,9 +27,9 @@ fn datetime_to_rfc3339(val: &mut Value, default_offset: time::UtcOffset) {
let offset_format = format_description!(
version = 2,
"[first
[[year]-[month]-[day] [hour]:[minute]:[second][optional [.[subsecond]]]Z]
[[year]-[month]-[day] [hour]:[minute]:[second][optional [.[subsecond]]]z]
[[year]-[month]-[day] [hour]:[minute]:[second][optional [.[subsecond]]][offset_hour]:[offset_minute]]
[[year]-[month]-[day][optional [T]][optional [ ]][hour]:[minute]:[second][optional [.[subsecond]]]Z]
[[year]-[month]-[day][optional [T]][optional [ ]][hour]:[minute]:[second][optional [.[subsecond]]]z]
[[year]-[month]-[day][optional [T]][optional [ ]][hour]:[minute]:[second][optional [.[subsecond]]][offset_hour]:[offset_minute]]
]"
);

Expand Down Expand Up @@ -64,9 +69,7 @@ impl Iterator for DatetimeSanitizer {
datetime_to_rfc3339(&mut val, self.default_offset);
Ok(val)
}
Err(e) => {
Err(ParseError::Parse(Box::new(e)))
}
e => e
})
}
}
Expand Down
2 changes: 0 additions & 2 deletions crates/parser/tests/examples/datetimes-naive.csv

This file was deleted.

2 changes: 0 additions & 2 deletions crates/parser/tests/examples/datetimes.csv

This file was deleted.

52 changes: 31 additions & 21 deletions crates/parser/tests/sanitize_test.rs
Original file line number Diff line number Diff line change
@@ -1,43 +1,53 @@
mod testutil;

use std::fs::File;
use std::io::Write;

use parser::ParseConfig;
use testutil::{input_for_file, run_test};
use tempfile::tempdir;

fn test_sanitize(description: &str, input: &str, expected: &str, default_offset: &str) {
let dir = tempdir().unwrap();
let path = dir.path().join("sanitize-test.csv");
let mut f = File::create(path.clone()).unwrap();
writeln!(f, "header").unwrap();
writeln!(f, "\"{}\"", input).unwrap();

#[test]
fn sanitize_datetime_to_rfc3339() {
let path = "tests/examples/datetimes.csv";
let cfg = ParseConfig {
filename: Some(path.to_string()),
filename: Some(path.to_string_lossy().to_string()),
default_offset: default_offset.to_string(),
..Default::default()
};

let input = input_for_file(path);
let output = run_test(&cfg, input);
output.assert_success(1);

let expected_first_row = "2020-01-01T00:00:00Z";
for value in output.parsed[0].as_object().unwrap().values() {
assert_eq!(expected_first_row, value.as_str().unwrap())
assert_eq!(expected, value.as_str().unwrap(), "{}", description)
}
}

#[test]
fn sanitize_datetime_to_rfc3339_offset() {
let path = "tests/examples/datetimes-naive.csv";
let cfg = ParseConfig {
default_offset: "-05:00".to_string(),
filename: Some(path.to_string()),
..Default::default()
};

let input = input_for_file(path);
let output = run_test(&cfg, input);
output.assert_success(1);
fn sanitize_datetime_to_rfc3339() {
// With Timezone
test_sanitize("tz rfc3339 utc" , "2020-01-01T12:34:56Z" , "2020-01-01T12:34:56Z" , "+00:00");
test_sanitize("tz rfc3339 offset" , "2020-01-01T12:34:56-04:00" , "2020-01-01T12:34:56-04:00" , "+00:00");
test_sanitize("tz rfc3339 fractional" , "2020-01-01T12:34:56.999999999Z" , "2020-01-01T12:34:56.999999999Z" , "+00:00");
test_sanitize("tz rfc3339 fractional + offset" , "2020-01-01T12:34:56.999999999-04:00" , "2020-01-01T12:34:56.999999999-04:00" , "+00:00");
test_sanitize("tz spaced fractional + offset" , "2020-01-01 12:34:56.999999999-04:00" , "2020-01-01T12:34:56.999999999-04:00" , "+00:00");
test_sanitize("tz spaced fractional + utc" , "2020-01-01 12:34:56.999999999Z" , "2020-01-01T12:34:56.999999999Z" , "+00:00");
test_sanitize("tz spaced offset" , "2020-01-01 12:34:56-04:00" , "2020-01-01T12:34:56-04:00" , "+00:00");
test_sanitize("tz spaced utc" , "2020-01-01 12:34:56Z" , "2020-01-01T12:34:56Z" , "+00:00");

let expected_first_row = "2020-01-01T00:00:00-05:00";
for value in output.parsed[0].as_object().unwrap().values() {
assert_eq!(expected_first_row, value.as_str().unwrap())
}
// Without Timezone
test_sanitize("naive t" , "2020-01-01T12:34:56" , "2020-01-01T12:34:56Z" , "+00:00");
test_sanitize("naive t + fractional" , "2020-01-01T12:34:56.999999999" , "2020-01-01T12:34:56.999999999Z" , "+00:00");
test_sanitize("naive t + fractional 2" , "2020-01-01T12:34:56.999999999" , "2020-01-01T12:34:56.999999999+04:00" , "+04:00");
test_sanitize("naive space" , "2020-01-01 12:34:56" , "2020-01-01T12:34:56Z" , "+00:00");
test_sanitize("naive space + fractional" , "2020-01-01 12:34:56.999999999" , "2020-01-01T12:34:56.999999999Z" , "+00:00");
test_sanitize("naive space + fractional 2" , "2020-01-01 12:34:56.999999999" , "2020-01-01T12:34:56.999999999+04:00" , "+04:00");
}

#[test]
Expand Down

0 comments on commit 9514bfb

Please sign in to comment.