Skip to content

Commit

Permalink
Revert "parser: sanitize timestamps to RFC3339"
Browse files Browse the repository at this point in the history
This reverts commit af5e785.
  • Loading branch information
mdibaiee committed Oct 12, 2023
1 parent ed9622c commit 4809671
Show file tree
Hide file tree
Showing 13 changed files with 3 additions and 297 deletions.
76 changes: 0 additions & 76 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ byteorder = "1.4"
caseless = "0.2"
chardetng = "0.1"
chrono = { version = "0.4", features = ["serde"] }
chrono-tz = { version = "0.8" }
clap = { version = "3.2", features = ["derive", "env"] }
colored_json = "3"
comfy-table = "6.1"
Expand Down
1 change: 0 additions & 1 deletion crates/parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ bytes = { workspace = true }
caseless = { workspace = true }
chardetng = { workspace = true }
chrono = { workspace = true }
chrono-tz = { workspace = true }
csv = { workspace = true }
encoding_rs = { workspace = true }
flate2 = { workspace = true }
Expand Down
28 changes: 2 additions & 26 deletions crates/parser/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,15 +528,11 @@ impl schemars::JsonSchema for ErrorThreshold {
}
}

fn default_timezone_string() -> String {
"UTC".to_string()
}

// Fields annotated with `schemars(skip)` will not appear in the JSON schema, and thus won't be
// shown in the UI. These are things that connectors set programatically when it generates the
// config. We could consider moving these fields to be CLI arguments if we want a clearer
// separation.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, schemars::JsonSchema)]
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, schemars::JsonSchema)]
#[schemars(
title = "Parser Configuration",
description = "Configures how files are parsed"
Expand All @@ -554,11 +550,6 @@ pub struct ParseConfig {
#[serde(default)]
pub compression: DefaultNullIsAutomatic<Compression>,

/// The default timezone to use when parsing timestamps that do not have a timezone. Timezones
/// must be specified as a valid IANA name. Defaults to UTC.
#[serde(default="default_timezone_string")]
pub default_timezone: String,

/// filename is used for format inference. It will be ignored if `format` is specified.
#[serde(default)]
#[schemars(skip)]
Expand Down Expand Up @@ -591,21 +582,6 @@ pub struct ParseConfig {
pub content_encoding: Option<String>,
}

impl Default for ParseConfig {
fn default() -> Self {
ParseConfig {
format: Default::default(),
compression: Default::default(),
default_timezone: default_timezone_string(),
filename: Default::default(),
add_record_offset: Default::default(),
add_values: Default::default(),
content_type: Default::default(),
content_encoding: Default::default(),
}
}
}

#[derive(Debug, thiserror::Error)]
pub enum ConfigError {
#[error("failed to read config: {0}")]
Expand Down Expand Up @@ -769,7 +745,7 @@ mod test {
}
},
"filename": "tha-file",
"compression": "zip"
"compression": "zip",
});

let r1: ParseConfig = serde_json::from_value(c1).expect("deserialize config");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,6 @@ expression: schema
}
]
},
"defaultTimezone": {
"description": "The default timezone to use when parsing timestamps that do not have a timezone. Timezones must be specified as a valid IANA name. Defaults to UTC.",
"default": "UTC",
"type": "string"
},
"format": {
"description": "Determines how to parse the contents. The default, 'Auto', will try to determine the format automatically based on the file extension or MIME type, if available.",
"default": {
Expand Down
7 changes: 1 addition & 6 deletions crates/parser/src/format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ pub mod avro;
pub mod character_separated;
pub mod json;
pub mod protobuf;
pub mod sanitize;

use crate::config::ErrorThreshold;
use crate::decorate::{AddFieldError, Decorator};
Expand Down Expand Up @@ -47,9 +46,6 @@ pub enum ParseError {

#[error("error limit exceeded")]
ErrorLimitExceeded(ErrorThreshold),

#[error("failed to sanitize documents: {0}")]
SanitizeError(#[from] sanitize::SanitizeError),
}

/// Runs format inference if the config does not specify a `format`. The expectation is that more
Expand Down Expand Up @@ -166,8 +162,7 @@ fn parse_file(
starting_offset: u64,
) -> Result<u64, ParseError> {
let output = parser.parse(input)?;
let sanitized_output = sanitize::sanitize_output(&config, output)?;
format_output(&config, sanitized_output, dest, starting_offset)
format_output(&config, output, dest, starting_offset)
}

fn parser_for(format: Format) -> Box<dyn Parser> {
Expand Down
99 changes: 0 additions & 99 deletions crates/parser/src/format/sanitize/datetime.rs

This file was deleted.

14 changes: 0 additions & 14 deletions crates/parser/src/format/sanitize/mod.rs

This file was deleted.

2 changes: 0 additions & 2 deletions crates/parser/tests/examples/datetimes-naive.csv

This file was deleted.

1 change: 0 additions & 1 deletion crates/parser/tests/examples/datetimes-nested.json

This file was deleted.

2 changes: 0 additions & 2 deletions crates/parser/tests/examples/datetimes.csv

This file was deleted.

Loading

0 comments on commit 4809671

Please sign in to comment.