Skip to content

Commit

Permalink
parser: improve performance by first parsing a naive date
Browse files Browse the repository at this point in the history
  • Loading branch information
mdibaiee committed Oct 6, 2023
1 parent 0f27b4c commit 8573cd9
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 21 deletions.
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ byteorder = "1.4"
caseless = "0.2"
chardetng = "0.1"
chrono = { version = "0.4", features = ["serde"] }
chrono-tz = { version = "0.8" }
clap = { version = "3.2", features = ["derive", "env"] }
colored_json = "3"
comfy-table = "6.1"
Expand Down
36 changes: 18 additions & 18 deletions crates/parser/src/format/sanitize/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,33 @@ struct DatetimeSanitizer {
default_offset: time::UtcOffset,
}

// Here we are trying to parse non-ambiguous, non-RFC3339 dates and formatting them as RFC3339
// So we skip any valid RFC3339 in our processing and pass it as-is
// Here we are trying to parse non-RFC3339 dates
fn datetime_to_rfc3339(val: &mut Value, default_offset: time::UtcOffset) {
match val {
Value::String(s) => {
let offset_format = format_description!(
version = 2,
"[first
[[year]-[month]-[day] [hour]:[minute]:[second][optional [.[subsecond]]]Z]
[[year]-[month]-[day] [hour]:[minute]:[second][optional [.[subsecond]]]z]
[[year]-[month]-[day] [hour]:[minute]:[second][optional [.[subsecond]]][offset_hour]:[offset_minute]]
]"
);

let primitive_format = format_description!(
version = 2,
"[year]-[month]-[day][optional [T]][optional [ ]][hour]:[minute]:[second][optional [.[subsecond]]]"
"[year]-[month]-[day][optional [T]][optional [ ]][hour]:[minute]:[second][optional [.[subsecond]]][optional [Z]][optional [z]][optional [[offset_hour]:[offset_minute]]]"
);

let parsed_with_tz = time::OffsetDateTime::parse(&s, offset_format);
let parsed_no_tz = time::PrimitiveDateTime::parse(&s, primitive_format);
let parsed_no_tz = time::PrimitiveDateTime::parse(&s, primitive_format).ok();

let parsed_with_tz = if parsed_no_tz.is_some() {
let offset_format = format_description!(
version = 2,
"[first
[[year]-[month]-[day] [hour]:[minute]:[second][optional [.[subsecond]]]Z]
[[year]-[month]-[day] [hour]:[minute]:[second][optional [.[subsecond]]]z]
[[year]-[month]-[day] [hour]:[minute]:[second][optional [.[subsecond]]][offset_hour]:[offset_minute]]
]"
);

time::OffsetDateTime::parse(&s, offset_format).ok()
} else { None };

if let Ok(parsed) = parsed_with_tz {
if let Some(parsed) = parsed_with_tz {
*s = parsed.format(&time::format_description::well_known::Rfc3339).unwrap();
} else if let Ok(parsed) = parsed_no_tz {
} else if let Some(parsed) = parsed_no_tz {
*s = parsed.assume_offset(default_offset).format(&time::format_description::well_known::Rfc3339).unwrap();
}
}
Expand Down Expand Up @@ -76,9 +78,7 @@ pub enum DatetimeSanitizeError {
}

pub fn sanitize_datetime(config: &ParseConfig, output: Output) -> Result<Output, DatetimeSanitizeError> {
eprintln!("sanitize_datetime");
let offset = time::UtcOffset::parse(&config.default_offset, format_description!("[offset_hour]:[offset_minute]")).map_err(DatetimeSanitizeError::OffsetParseError)?;
eprintln!("offset: {:?}", offset);
let sanitizer = DatetimeSanitizer {
from: output,
default_offset: offset,
Expand Down
2 changes: 0 additions & 2 deletions crates/parser/tests/sanitize_test.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
mod testutil;

use chrono::DateTime;

use parser::ParseConfig;
use testutil::{input_for_file, run_test};

Expand Down

0 comments on commit 8573cd9

Please sign in to comment.