Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add optional argument forcing syslog parcing to one of both RFCs #1051

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

170 changes: 137 additions & 33 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,39 +10,117 @@ repository = "https://github.com/vectordotdev/vrl"
readme = "README.md"
keywords = ["vector", "datadog", "compiler"]
categories = ["compilers"]
rust-version = "1.79" # msrv
rust-version = "1.79" # msrv

[workspace]
members = [
".",
"lib/cli",
"lib/tests",
"lib/fuzz"
]
members = [".", "lib/cli", "lib/tests", "lib/fuzz"]


[features]
default = ["compiler", "value", "diagnostic", "path", "parser", "stdlib", "datadog", "core"]
default = [
"compiler",
"value",
"diagnostic",
"path",
"parser",
"stdlib",
"datadog",
"core",
]

# Main features (on by default)
compiler = ["diagnostic", "path", "parser", "value", "dep:paste", "dep:chrono", "dep:serde", "dep:regex", "dep:bytes", "dep:ordered-float", "dep:chrono-tz", "dep:snafu", "dep:thiserror", "dep:dyn-clone", "dep:indoc", "dep:thiserror", "dep:lalrpop-util"]
value = ["path", "dep:bytes", "dep:regex", "dep:ordered-float", "dep:chrono", "dep:serde_json"]
compiler = [
"diagnostic",
"path",
"parser",
"value",
"dep:paste",
"dep:chrono",
"dep:serde",
"dep:regex",
"dep:bytes",
"dep:ordered-float",
"dep:chrono-tz",
"dep:snafu",
"dep:thiserror",
"dep:dyn-clone",
"dep:indoc",
"dep:thiserror",
"dep:lalrpop-util",
]
value = [
"path",
"dep:bytes",
"dep:regex",
"dep:ordered-float",
"dep:chrono",
"dep:serde_json",
]
diagnostic = ["dep:codespan-reporting", "dep:termcolor"]
path = ["value", "dep:once_cell", "dep:serde", "dep:snafu", "dep:regex"]
parser = ["path", "diagnostic", "value", "dep:thiserror", "dep:ordered-float", "dep:lalrpop-util"]
parsing = ["value", "compiler", "dep:url", "dep:nom", "dep:regex", "dep:roxmltree", "dep:rust_decimal"]
parser = [
"path",
"diagnostic",
"value",
"dep:thiserror",
"dep:ordered-float",
"dep:lalrpop-util",
]
parsing = [
"value",
"compiler",
"dep:url",
"dep:nom",
"dep:regex",
"dep:roxmltree",
"dep:rust_decimal",
]
core = ["value", "dep:snafu", "dep:nom"]
string_path = []

# Datadog related features (on by default)
datadog = ["datadog_filter", "datadog_grok", "datadog_search"]
datadog_filter = ["path", "datadog_search", "dep:regex", "dep:dyn-clone"]
datadog_grok = ["value", "parsing", "dep:nom", "dep:peeking_take_while", "dep:serde_json", "dep:onig", "dep:lalrpop-util", "dep:thiserror", "dep:chrono", "dep:chrono-tz", "dep:percent-encoding", "dep:fancy-regex"]
datadog_search = ["dep:pest", "dep:pest_derive", "dep:itertools", "dep:once_cell", "dep:regex", "dep:serde"]
datadog_grok = [
"value",
"parsing",
"dep:nom",
"dep:peeking_take_while",
"dep:serde_json",
"dep:onig",
"dep:lalrpop-util",
"dep:thiserror",
"dep:chrono",
"dep:chrono-tz",
"dep:percent-encoding",
"dep:fancy-regex",
]
datadog_search = [
"dep:pest",
"dep:pest_derive",
"dep:itertools",
"dep:once_cell",
"dep:regex",
"dep:serde",
]

# Features that aren't used as often (default off)
cli = ["stdlib", "dep:serde_json", "dep:thiserror", "dep:clap", "dep:exitcode", "dep:webbrowser", "dep:rustyline", "dep:prettytable-rs"]
test_framework = ["compiler", "dep:prettydiff", "dep:serde_json", "dep:ansi_term"]
cli = [
"stdlib",
"dep:serde_json",
"dep:thiserror",
"dep:clap",
"dep:exitcode",
"dep:webbrowser",
"dep:rustyline",
"dep:prettytable-rs",
]
test_framework = [
"compiler",
"dep:prettydiff",
"dep:serde_json",
"dep:ansi_term",
]
arbitrary = ["dep:quickcheck", "dep:arbitrary"]
lua = ["dep:mlua"]
proptest = ["dep:proptest", "dep:proptest-derive"]
Expand Down Expand Up @@ -120,25 +198,31 @@ stdlib = [
cfg-if = "1"

# Optional dependencies
ansi_term = {version = "0.12", optional = true }
ansi_term = { version = "0.12", optional = true }
arbitrary = { version = "1", optional = true, features = ["derive"] }
base16 = { version = "0.2", optional = true }
base62 = { version = "2.0.3", optional = true }
base64 = { version = "0.22", optional = true }
bytes = { version = "1", default-features = false, optional = true }
charset = { version = "0.1", optional = true }
chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "wasmbind"], optional = true }
chrono = { version = "0.4", default-features = false, features = [
"clock",
"serde",
"wasmbind",
], optional = true }
chrono-tz = { version = "0.10", default-features = false, optional = true }
cidr-utils = { version = "0.6", optional = true }
csv = { version = "1", optional = true }
clap = { version = "4", features = ["derive"], optional = true }
codespan-reporting = {version = "0.11", optional = true }
codespan-reporting = { version = "0.11", optional = true }
convert_case = { version = "0.6.0", optional = true }
data-encoding = { version = "2", optional = true }
digest = { version = "0.10", optional = true }
dyn-clone = { version = "1", default-features = false, optional = true }
exitcode = {version = "1", optional = true }
flate2 = { version = "1", default-features = false, features = ["default"], optional = true }
exitcode = { version = "1", optional = true }
flate2 = { version = "1", default-features = false, features = [
"default",
], optional = true }
hex = { version = "0.4", optional = true }
hmac = { version = "0.12", optional = true }
iana-time-zone = { version = "0.1", optional = true }
Expand All @@ -156,19 +240,27 @@ md-5 = { version = "0.10", optional = true }
paste = { version = "1", default-features = false, optional = true }
peeking_take_while = { version = "1", default-features = false, optional = true }
percent-encoding = { version = "2", optional = true }
pest = { version = "2", default-features = false, optional = true, features = ["std"] }
pest_derive = { version = "2", default-features = false, optional = true, features = ["std"] }
pest = { version = "2", default-features = false, optional = true, features = [
"std",
] }
pest_derive = { version = "2", default-features = false, optional = true, features = [
"std",
] }
proptest = { version = "1", optional = true }
proptest-derive = { version = "0.5", optional = true }
prettydiff = {version = "0.7", default-features = false, optional = true }
prettytable-rs = { version = "0.10", default-features = false, optional = true }
quickcheck = { version = "1", optional = true }
quoted_printable = {version = "0.5", optional = true }
quoted_printable = { version = "0.5", optional = true }
psl = { version = "2", optional = true }
psl-types = { version = "2", optional = true }
publicsuffix = { version = "2", optional = true }
rand = { version = "0.8", optional = true }
regex = { version = "1", default-features = false, optional = true, features = ["std", "perf", "unicode"] }
regex = { version = "1", default-features = false, optional = true, features = [
"std",
"perf",
"unicode",
] }
roxmltree = { version = "0.20", optional = true }
rustyline = { version = "14", default-features = false, optional = true }
rust_decimal = { version = "1", optional = true }
Expand All @@ -181,19 +273,21 @@ sha-2 = { package = "sha2", version = "0.10", optional = true }
sha-3 = { package = "sha3", version = "0.10", optional = true }
strip-ansi-escapes = { version = "0.2", optional = true }
snap = { version = "1", optional = true }
syslog_loose = { version = "0.21", optional = true }
termcolor = {version = "1", optional = true }
thiserror ={ version = "1", optional = true }
syslog_loose = { git = "https://github.com/itkovian/syslog-loose", rev = "7acfd39be5bd", optional = true }
termcolor = { version = "1", optional = true }
thiserror = { version = "1", optional = true }
tracing = { version = "0.1", default-features = false }
uaparser = { version = "0.6", default-features = false, optional = true }
utf8-width = { version = "0.1", optional = true }
url = { version = "2", optional = true }
snafu = { version = "0.8", optional = true }
webbrowser = { version = "1.0", default-features = false, optional = true }
woothee = { version = "0.13", optional = true }
community-id = { version = "0.2", optional = true}
community-id = { version = "0.2", optional = true }

zstd = { version = "0.13", default-features = false, features = ["wasm"], optional = true }
zstd = { version = "0.13", default-features = false, features = [
"wasm",
], optional = true }

# Cryptography
aes = { version = "0.8", optional = true }
Expand All @@ -208,8 +302,10 @@ cfb-mode = { version = "0.8", optional = true }
ofb = { version = "0.6", optional = true }

# Protobuf support.
prost = { version = "0.13", default-features = false, optional = true, features = ["std"]}
prost-reflect = { version = "0.14", default-features = false, optional = true}
prost = { version = "0.13", default-features = false, optional = true, features = [
"std",
] }
prost-reflect = { version = "0.14", default-features = false, optional = true }

# Dependencies used for non-WASM
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
Expand All @@ -218,7 +314,15 @@ domain = { version = "0.10.3", optional = true, features = ["resolv-sync", "serd
hostname = { version = "0.4", optional = true }
grok = { version = "2", optional = true }
onig = { version = "6", default-features = false, optional = true }
tokio = { version = "1.38", optional = true, features = ["io-util", "macros", "net", "time", "sync", "rt", "rt-multi-thread" ] }
tokio = { version = "1.38", optional = true, features = [
"io-util",
"macros",
"net",
"time",
"sync",
"rt",
"rt-multi-thread",
] }
uuid = { version = "1", features = ["v4", "v7"], optional = true }

# Dependencies used for WASM
Expand Down
6 changes: 5 additions & 1 deletion src/stdlib/parse_linux_authorization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ impl Function for ParseLinuxAuthorization {
let value = arguments.required("value");

// The parse_linux_authorization function is just an alias for parse_syslog
Ok(ParseSyslogFn { value }.as_expr())
Ok(ParseSyslogFn {
value: value,
variant: "either".into(),
}
.as_expr())
}
}
60 changes: 45 additions & 15 deletions src/stdlib/parse_syslog.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
use crate::compiler::prelude::*;
use chrono::{DateTime, Datelike, Utc};
use nom::AsBytes;
use std::collections::BTreeMap;
use syslog_loose::{IncompleteDate, Message, ProcId, Protocol, Variant};

pub(crate) fn parse_syslog(value: Value, ctx: &Context) -> Resolved {
pub(crate) fn parse_syslog(value: Value, variant: Variant, ctx: &Context) -> Resolved {
let message = value.try_bytes_utf8_lossy()?;
let timezone = match ctx.timezone() {
TimeZone::Local => None,
TimeZone::Named(tz) => Some(*tz),
};
let parsed = syslog_loose::parse_message_with_year_exact_tz(
&message,
resolve_year,
timezone,
Variant::Either,
)?;
let parsed =
syslog_loose::parse_message_with_year_exact_tz(&message, resolve_year, timezone, variant)?;
Ok(message_to_value(parsed))
}

Expand All @@ -27,11 +24,18 @@ impl Function for ParseSyslog {
}

fn parameters(&self) -> &'static [Parameter] {
&[Parameter {
keyword: "value",
kind: kind::BYTES,
required: true,
}]
&[
Parameter {
keyword: "value",
kind: kind::BYTES,
required: true,
},
Parameter {
keyword: "variant",
kind: kind::BYTES,
required: false,
},
]
}

fn examples(&self) -> &'static [Example] {
Expand Down Expand Up @@ -59,26 +63,40 @@ impl Function for ParseSyslog {

fn compile(
&self,
_state: &state::TypeState,
state: &state::TypeState,
_ctx: &mut FunctionCompileContext,
arguments: ArgumentList,
) -> Compiled {
let variants = vec!["either".into(), "rfc3164".into(), "rfc5424".into()];

let value = arguments.required("value");
let variant = arguments
.optional_enum("variant", &variants, state)?
.unwrap_or_else(|| "either".into())
.try_bytes()
.expect("syslog variant is not bytes");

Ok(ParseSyslogFn { value }.as_expr())
Ok(ParseSyslogFn { value, variant }.as_expr())
}
}

#[derive(Debug, Clone)]
pub(crate) struct ParseSyslogFn {
pub(crate) value: Box<dyn Expression>,
pub(crate) variant: Bytes,
}

impl FunctionExpression for ParseSyslogFn {
fn resolve(&self, ctx: &mut Context) -> Resolved {
let value = self.value.resolve(ctx)?;
let variant = match self.variant.as_bytes() {
b"rfc3164" => Variant::RFC3164,
b"rfc5424" => Variant::RFC5424,
b"either" => Variant::Either,
_ => panic!("No such variant for syslog RFC"),
};

parse_syslog(value, ctx)
parse_syslog(value, variant, ctx)
}

fn type_def(&self, _: &state::TypeState) -> TypeDef {
Expand Down Expand Up @@ -356,5 +374,17 @@ mod tests {
}),
tdef: TypeDef::object(inner_kind()).fallible(),
}

force_rfc_3164 {
args: func_args![value: "2024-09-19T15:39:45.469+02:00 node1234 slurmstepd[548422]: [65684352.batch] done with job", variant: "rfc3164"],
want: Ok(btreemap!{
"appname" => "slurmstepd",
"hostname" => "node1234",
"message" => "[65684352.batch] done with job",
"procid" => 548422,
"timestamp" => chrono::Utc.ymd(2024, 09, 19).and_hms_milli(13,39,45,469),
}),
tdef: TypeDef::object(inner_kind()).fallible(),
}
];
}