diff --git a/Cargo.toml b/Cargo.toml index 3feea5f..94312f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,11 +28,12 @@ serde={version="1.0", features=["derive"], optional = true} itertools = {version = "0.11.0", optional = true} ipnet = {version="2.7", optional = true} bitflags = {version="2.3.3", features = ["serde"], optional = true} +thiserror = {version = "1.0.44", optional = true} +smallvec = {version = "1.11.0", features = ["union"], optional = true} ####################### # Parser dependencies # ####################### -bytes = {version = "1.4.0", optional = true} hex= {version = "0.4.3", optional = true} # bmp/openbmp parsing log= {version = "0.4", optional = true } oneio = {version= "0.11.0", features=["lib"], optional = true } @@ -54,9 +55,10 @@ models = [ "ipnet", "itertools", "bitflags", + "thiserror", + "smallvec", ] parser = [ - "bytes", "chrono", "env_logger", "log", @@ -80,16 +82,25 @@ rislive = [ serde = [ "dep:serde", "ipnet/serde", + "smallvec/serde", ] [[bench]] name = "internals" harness = false +[[bench]] +name = "mrt_type" +harness = false + [[bench]] name = "bench_main" harness = false +# Enable debug symbols for benchmarks for easier profiling +[profile.bench] +debug = true + [dev-dependencies] anyhow = "1" bgpkit-broker = "0.6.2" diff --git a/benches/mrt_type.rs b/benches/mrt_type.rs new file mode 100644 index 0000000..98601df --- /dev/null +++ b/benches/mrt_type.rs @@ -0,0 +1,114 @@ +use bgpkit_parser::models::EntryType; +use bgpkit_parser::mrt_record::parse_common_header; +use bgpkit_parser::BgpkitParser; +use bzip2::bufread::BzDecoder; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use flate2::bufread::GzDecoder; +use std::fs::File; +use std::io::{BufReader, Read, Seek, SeekFrom}; + +mod data_source; + +const RECORDS_PER_TYPE: usize = 100; + +/// Choose a mix of records with a given MRT type and subtype. The records are chosen to get a +/// uniform distribution of different length records. +fn select_mrt_records(mut input_reader: R, mrt_type: EntryType, subtype: u16) -> Vec { + let mut included = Vec::new(); + let mut buffer = Vec::with_capacity(4096); + + while let Ok(header) = parse_common_header(&mut input_reader) { + buffer.clear(); + header + .write_header(&mut buffer) + .expect("able to write header to vec"); + (&mut input_reader) + .take(header.length as u64) + .read_to_end(&mut buffer) + .expect("able to read message body"); + + if header.entry_type == mrt_type && header.entry_subtype == subtype { + included.push(std::mem::replace(&mut buffer, Vec::with_capacity(4096))); + } + } + + included.sort_by_key(Vec::len); + + if included.is_empty() { + println!("No records found for MRT {:?} {:?}", mrt_type, subtype); + return Vec::new(); + } + + let mut record_output = Vec::new(); + for n in 0..RECORDS_PER_TYPE { + let index = (n * included.len()) / RECORDS_PER_TYPE; + record_output.extend_from_slice(&included[index][..]); + } + + record_output +} + +pub fn criterion_benchmark(c: &mut Criterion) { + let update_data = data_source::test_data_file("update-example.gz"); + let rib_data = data_source::test_data_file("rib-example-small.bz2"); + + let updates_reader = BufReader::new(File::open(update_data).unwrap()); + let mut rib_reader = BufReader::new(File::open(rib_data).unwrap()); + + println!("Decompressing input data and loading into memory..."); + let bgp4mp_updates = select_mrt_records(GzDecoder::new(updates_reader), EntryType::BGP4MP, 4); + let rib_ipv4_unicast = + select_mrt_records(BzDecoder::new(&mut rib_reader), EntryType::TABLE_DUMP_V2, 2); + rib_reader.seek(SeekFrom::Start(0)).unwrap(); + let rib_ipv6_unicast = + select_mrt_records(BzDecoder::new(&mut rib_reader), EntryType::TABLE_DUMP_V2, 4); + + c.bench_function("BGP4MP Update", |b| { + b.iter_with_large_drop(|| { + let mut reader = black_box(&bgp4mp_updates[..]); + let mut holder: [Option<_>; RECORDS_PER_TYPE] = std::array::from_fn(|_| None); + + BgpkitParser::from_reader(&mut reader) + .into_record_iter() + .enumerate() + .for_each(|(index, x)| holder[index] = Some(x)); + + holder + }) + }); + + c.bench_function("TABLE_DUMP_V2 IPv4 Unicast", |b| { + b.iter_with_large_drop(|| { + let mut reader = black_box(&rib_ipv4_unicast[..]); + let mut holder: [Option<_>; RECORDS_PER_TYPE] = std::array::from_fn(|_| None); + + BgpkitParser::from_reader(&mut reader) + .into_record_iter() + .enumerate() + .for_each(|(index, x)| holder[index] = Some(x)); + + holder + }) + }); + + c.bench_function("TABLE_DUMP_V2 IPv6 Unicast", |b| { + b.iter_with_large_drop(|| { + let mut reader = black_box(&rib_ipv6_unicast[..]); + let mut holder: [Option<_>; RECORDS_PER_TYPE] = std::array::from_fn(|_| None); + + BgpkitParser::from_reader(&mut reader) + .into_record_iter() + .enumerate() + .for_each(|(index, x)| holder[index] = Some(x)); + + holder + }) + }); +} + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = criterion_benchmark +} +criterion_main!(benches); diff --git a/examples/cache_reading.rs b/examples/cache_reading.rs index 85c7023..a9bfe5e 100644 --- a/examples/cache_reading.rs +++ b/examples/cache_reading.rs @@ -15,7 +15,10 @@ fn main() { let parser = BgpkitParser::new_cached(item.url.as_str(), "/tmp/bgpkit-cache-example/").unwrap(); // iterating through the parser. the iterator returns `BgpElem` one at a time. - let elems = parser.into_elem_iter().collect::>(); + let elems = parser + .into_elem_iter() + .filter_map(Result::ok) + .collect::>(); log::info!("{} {} {}", item.collector_id, item.url, elems.len()); } } diff --git a/examples/deprecated_attributes.rs b/examples/deprecated_attributes.rs index c1d0e6d..04fbae1 100644 --- a/examples/deprecated_attributes.rs +++ b/examples/deprecated_attributes.rs @@ -9,6 +9,7 @@ fn main() { ) .unwrap() { + let elem = elem.unwrap(); if elem.deprecated.is_some() { println!( "{}", diff --git a/examples/display_elems.rs b/examples/display_elems.rs index 67106d1..7b9ee47 100644 --- a/examples/display_elems.rs +++ b/examples/display_elems.rs @@ -4,6 +4,7 @@ fn main() { let url = "http://archive.routeviews.org/bgpdata/\ 2021.10/UPDATES/updates.20211001.0000.bz2"; for elem in BgpkitParser::new(url).unwrap() { + let elem = elem.unwrap(); println!( "{:?}|{:?}|{:?}|{:?}|{:?}", elem.elem_type, elem.timestamp, elem.prefix, elem.as_path, elem.next_hop, diff --git a/examples/extended_communities.rs b/examples/extended_communities.rs index 11f0c79..d249264 100644 --- a/examples/extended_communities.rs +++ b/examples/extended_communities.rs @@ -16,6 +16,7 @@ fn main() { log::info!("parsing updates file"); // iterating through the parser. the iterator returns `BgpElem` one at a time. for elem in parser { + let elem = elem.unwrap(); if let Some(cs) = &elem.communities { for c in cs { match c { diff --git a/examples/filters.rs b/examples/filters.rs index 7c13623..eee3a72 100644 --- a/examples/filters.rs +++ b/examples/filters.rs @@ -1,3 +1,4 @@ +use bgpkit_parser::filter::Filter; use bgpkit_parser::BgpkitParser; /// This example shows how to parse a MRT file and filter by prefix. @@ -27,13 +28,12 @@ fn main() { "http://archive.routeviews.org/bgpdata/2021.10/UPDATES/updates.20211001.0000.bz2", ) .unwrap() - .add_filter("prefix", "211.98.251.0/24") - .unwrap(); + .add_filter(Filter::prefix("211.98.251.0/24").unwrap()); log::info!("parsing updates file"); // iterating through the parser. the iterator returns `BgpElem` one at a time. for elem in parser { - log::info!("{}", &elem); + log::info!("{}", elem.unwrap()); } log::info!("done"); } diff --git a/examples/find_as_set_messages.rs b/examples/find_as_set_messages.rs index 79cc732..611a808 100644 --- a/examples/find_as_set_messages.rs +++ b/examples/find_as_set_messages.rs @@ -19,6 +19,7 @@ fn main() { let collector = item.collector_id.clone(); let mut origins: HashSet = HashSet::new(); for elem in parser { + let elem = elem.unwrap(); if !elem.elem_type.is_announce() { continue; } diff --git a/examples/only-to-customer.rs b/examples/only-to-customer.rs index a2e1090..d11e0dd 100644 --- a/examples/only-to-customer.rs +++ b/examples/only-to-customer.rs @@ -6,6 +6,7 @@ fn main() { ) .unwrap() { + let elem = elem.unwrap(); if let Some(otc) = elem.only_to_customer { println!( "OTC found: {} for path {}\n{}\n", diff --git a/examples/parse-files-from-broker.rs b/examples/parse-files-from-broker.rs index d339851..297f3f5 100644 --- a/examples/parse-files-from-broker.rs +++ b/examples/parse-files-from-broker.rs @@ -19,6 +19,7 @@ fn main() { // iterating through the parser. the iterator returns `BgpElem` one at a time. let elems = parser .into_elem_iter() + .map(Result::unwrap) .filter_map(|elem| { if let Some(origins) = &elem.origin_asns { if origins.contains(&13335.into()) { diff --git a/examples/parse-single-file.rs b/examples/parse-single-file.rs index e4b6994..db7acc1 100644 --- a/examples/parse-single-file.rs +++ b/examples/parse-single-file.rs @@ -15,7 +15,7 @@ fn main() { log::info!("parsing updates file"); // iterating through the parser. the iterator returns `BgpElem` one at a time. for elem in parser { - log::info!("{}", &elem); + log::info!("{}", elem.unwrap()); } log::info!("done"); } diff --git a/examples/peer_index_table.rs b/examples/peer_index_table.rs index e6ae29d..a97ef3a 100644 --- a/examples/peer_index_table.rs +++ b/examples/peer_index_table.rs @@ -8,6 +8,6 @@ fn main() { let url = "https://data.ris.ripe.net/rrc03/2021.11/bview.20211128.1600.gz"; let parser = bgpkit_parser::BgpkitParser::new(url).unwrap(); for record in parser.into_record_iter().take(1) { - println!("{}", to_string_pretty(&json!(record)).unwrap()); + println!("{}", to_string_pretty(&json!(record.unwrap())).unwrap()); } } diff --git a/examples/real-time-routeviews-kafka-openbmp.rs b/examples/real-time-routeviews-kafka-openbmp.rs index 33d8c95..15eb435 100644 --- a/examples/real-time-routeviews-kafka-openbmp.rs +++ b/examples/real-time-routeviews-kafka-openbmp.rs @@ -3,7 +3,6 @@ extern crate core; use bgpkit_parser::parser::bmp::messages::MessageBody; use bgpkit_parser::Elementor; pub use bgpkit_parser::{parse_bmp_msg, parse_openbmp_header}; -use bytes::Bytes; use kafka::consumer::{Consumer, FetchOffset, GroupOffsetStorage}; use kafka::error::Error as KafkaError; use log::{error, info}; @@ -30,7 +29,7 @@ fn consume_and_print(group: String, topic: String, brokers: Vec) -> Resu for ms in mss.iter() { for m in ms.messages() { - let mut bytes = Bytes::from(m.value.to_vec()); + let mut bytes = m.value; let header = parse_openbmp_header(&mut bytes).unwrap(); let bmp_msg = parse_bmp_msg(&mut bytes); match bmp_msg { diff --git a/examples/records_iter.rs b/examples/records_iter.rs index e5f9322..efd1152 100644 --- a/examples/records_iter.rs +++ b/examples/records_iter.rs @@ -6,7 +6,7 @@ fn main() { let url = "http://archive.routeviews.org/route-views.amsix/bgpdata/2023.02/UPDATES/updates.20230222.0430.bz2"; let parser = BgpkitParser::new(url).unwrap(); for record in parser.into_record_iter() { - match record.message { + match record.unwrap().message { MrtMessage::TableDumpMessage(_) => {} MrtMessage::TableDumpV2Message(_) => {} MrtMessage::Bgp4Mp(msg) => match msg { diff --git a/src/bin/main.rs b/src/bin/main.rs index 4b416fd..3839d0f 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -1,10 +1,12 @@ -use itertools::Itertools; -use serde_json::json; -use std::io::Write; +use std::fmt::Display; +use std::io; +use std::io::{stdout, BufWriter, Write}; use std::net::IpAddr; use std::path::PathBuf; -use bgpkit_parser::{BgpkitParser, Elementor}; +use bgpkit_parser::filter::Filter; +use bgpkit_parser::models::ElemType; +use bgpkit_parser::{BgpkitParser, Elementor, PrefixMatchType}; use clap::Parser; use ipnet::IpNet; @@ -104,43 +106,39 @@ fn main() { }; if let Some(v) = opts.filters.as_path { - parser = parser.add_filter("as_path", v.as_str()).unwrap(); + parser = parser.add_filter(Filter::as_path(v.as_str()).unwrap()); } if let Some(v) = opts.filters.origin_asn { - parser = parser - .add_filter("origin_asn", v.to_string().as_str()) - .unwrap(); + parser = parser.add_filter(Filter::OriginAsn(v)); } if let Some(v) = opts.filters.prefix { let filter_type = match (opts.filters.include_super, opts.filters.include_sub) { - (false, false) => "prefix", - (true, false) => "prefix_super", - (false, true) => "prefix_sub", - (true, true) => "prefix_super_sub", + (false, false) => PrefixMatchType::Exact, + (true, false) => PrefixMatchType::IncludeSuper, + (false, true) => PrefixMatchType::IncludeSub, + (true, true) => PrefixMatchType::IncludeSuperSub, }; - parser = parser - .add_filter(filter_type, v.to_string().as_str()) - .unwrap(); + parser = parser.add_filter(Filter::Prefix(v, filter_type)); } if !opts.filters.peer_ip.is_empty() { - let v = opts.filters.peer_ip.iter().map(|p| p.to_string()).join(","); - parser = parser.add_filter("peer_ips", v.as_str()).unwrap(); + parser = parser.add_filter(Filter::PeerIps(opts.filters.peer_ip.to_owned())); } if let Some(v) = opts.filters.peer_asn { - parser = parser - .add_filter("peer_asn", v.to_string().as_str()) - .unwrap(); + parser = parser.add_filter(Filter::PeerAsn(v)); } if let Some(v) = opts.filters.elem_type { - parser = parser.add_filter("type", v.as_str()).unwrap(); + let filter_type = match v.as_str() { + "w" | "withdraw" | "withdrawal" => ElemType::WITHDRAW, + "a" | "announce" | "announcement" => ElemType::ANNOUNCE, + x => panic!("cannot parse elem type from {}", x), + }; + parser = parser.add_filter(Filter::Type(filter_type)); } if let Some(v) = opts.filters.start_ts { - parser = parser - .add_filter("start_ts", v.to_string().as_str()) - .unwrap(); + parser = parser.add_filter(Filter::TsStart(v)); } if let Some(v) = opts.filters.end_ts { - parser = parser.add_filter("end_ts", v.to_string().as_str()).unwrap(); + parser = parser.add_filter(Filter::TsEnd(v)); } match (opts.elems_count, opts.records_count) { @@ -148,8 +146,13 @@ fn main() { let mut elementor = Elementor::new(); let (mut records_count, mut elems_count) = (0, 0); for record in parser.into_record_iter() { - records_count += 1; - elems_count += elementor.record_to_elems(record).len(); + match record { + Ok(record) => { + records_count += 1; + elems_count += elementor.record_to_elems(record).len(); + } + Err(err) => handle_non_fatal_error(&mut stdout(), err), + } } println!("total records: {}", records_count); println!("total elems: {}", elems_count); @@ -158,28 +161,71 @@ fn main() { println!("total records: {}", parser.into_record_iter().count()); } (true, false) => { - println!("total records: {}", parser.into_elem_iter().count()); + println!("total elems: {}", parser.into_elem_iter().count()); } (false, false) => { - let mut stdout = std::io::stdout(); + let mut stdout = BufWriter::new(stdout().lock()); + for elem in parser { - let output_str = if opts.json { - let val = json!(elem); - if opts.pretty { - serde_json::to_string_pretty(&val).unwrap() - } else { - val.to_string() + match elem { + Ok(elem) => { + if opts.json { + let res = if opts.pretty { + serde_json::to_writer_pretty(&mut stdout, &elem) + } else { + serde_json::to_writer(&mut stdout, &elem) + }; + + handle_serde_json_result(&mut stdout, res); + } else { + let res = writeln!(stdout, "{}", elem); + handle_io_result(&mut stdout, res); + } } - } else { - elem.to_string() - }; - if let Err(e) = writeln!(stdout, "{}", &output_str) { - if e.kind() != std::io::ErrorKind::BrokenPipe { - eprintln!("{}", e); + Err(err) => { + let res = stdout.flush(); + handle_io_result(&mut stdout, res); + eprintln!("{}", err); } - std::process::exit(1); } } } } } + +fn handle_serde_json_result(stdout: &mut W, res: serde_json::Result<()>) { + if let Err(err) = res { + if err.is_io() { + // If it was an IO error, we likely wont be able to flush stdout + eprintln!("{}", err); + std::process::exit(1); + } + + handle_non_fatal_error(stdout, err); + } +} + +fn handle_non_fatal_error(stdout: &mut W, err: E) { + // Attempt to flush stdout before printing the error to avoid mangling combined CLI output + if let Err(flush_err) = stdout.flush() { + eprintln!("{}", err); + eprintln!("{}", flush_err); + std::process::exit(1); + } + + // Write the error to stderr then flush stderr to avoid mangling combined CLI output + eprintln!("{}", err); + if io::stderr().flush().is_err() { + // If this fails, then we are out of options for logging errors + std::process::exit(1); + } +} + +fn handle_io_result(stdout: &mut W, res: io::Result<()>) { + if let Err(err) = res { + // We can try flushing stdout, but it will almost certainly fail + let _ = stdout.flush(); + eprintln!("{}", err); + std::process::exit(1); + } +} diff --git a/src/error.rs b/src/error.rs index 6e6eacb..c80c33d 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,122 +1,74 @@ /*! error module defines the error types used in bgpkit-parser. */ -use crate::models::{Afi, Bgp4MpType, BgpState, EntryType, Safi, TableDumpV2Type}; -use num_enum::TryFromPrimitiveError; -use oneio::OneIoError; -use std::fmt::{Display, Formatter}; -use std::io::ErrorKind; -use std::{error::Error, fmt, io}; +use crate::models::{AttrType, EntryType}; +use num_enum::{TryFromPrimitive, TryFromPrimitiveError}; +use std::io; +use thiserror::Error; -#[derive(Debug)] +#[derive(Debug, Error)] pub enum ParserError { - IoError(io::Error), - IoNotEnoughBytes(), - EofError(io::Error), - OneIoError(OneIoError), - EofExpected, - ParseError(String), - TruncatedMsg(String), - Unsupported(String), - FilterError(String), + /// This error represents a [num_enum::TryFromPrimitiveError] error for any of a number of + /// different types. + /// + /// ## Occurs during: + /// - Parsing of an MRT message body + #[error("unrecognized value {value} for {type_name}")] + UnrecognizedEnumVariant { type_name: &'static str, value: u64 }, + /// Indicates that the MRT message header type could not be determined while parsing a MRT + /// header. + /// + /// ## Occurs during: + /// - Parsing of an MRT message header + #[error("unrecognized type {0} in MRT header")] + UnrecognizedMrtType(u16), + /// This error represents a [ipnet::PrefixLenError] error. It occurs if an address mask is + /// larger than the length of the address it is being applied to. + /// + /// ## Occurs during: + /// - Reading network prefixes (parsing of an MRT message body) + #[error("invalid network prefix mask")] + InvalidPrefixLength(#[from] ipnet::PrefixLenError), + /// A general IO error triggered by the internal reader. + /// + /// ## Occurs during: + /// - Reading of an MRT record header + /// - Buffering of an MRT record body before parsing + #[error(transparent)] + IoError(#[from] io::Error), + #[error("unable to parse unsupported MRT type {mrt_type:?} subtype {subtype}")] + UnsupportedMrtType { mrt_type: EntryType, subtype: u16 }, + #[error("unable to parse unsupported attribute type {0:?}")] + UnsupportedAttributeType(AttrType), + /// Indicates internal length inconsistencies within an MRT message. This includes fixed-length + /// and length-prefixed data requiring more or less space than is available within the enclosing + /// container. + #[error( + "encountered truncated value during {name}; expected {expected} bytes, but found {found}" + )] + InconsistentFieldLength { + name: &'static str, + expected: usize, + found: usize, + }, + #[error("invalid BGP message length {0} (expected 19 <= length <= 4096)")] + InvalidBgpMessageLength(u16), + #[error("invalid length {0} for MP_NEXT_HOP")] + InvalidNextHopLength(usize), + #[error("invalid length {0} for AGGREGATOR attribute (should be 6 or 8)")] + InvalidAggregatorAttrLength(usize), } -impl Error for ParserError {} - -#[derive(Debug)] -pub struct ParserErrorWithBytes { - pub error: ParserError, - pub bytes: Option>, -} - -impl Display for ParserErrorWithBytes { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.error) - } -} - -impl Error for ParserErrorWithBytes {} - -/// implement Display trait for Error which satistifies the std::error::Error -/// trait's requirement (must implement Display and Debug traits, Debug already derived) -impl Display for ParserError { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - ParserError::IoError(e) => write!(f, "Error: {}", e), - ParserError::EofError(e) => write!(f, "Error: {}", e), - ParserError::ParseError(s) => write!(f, "Error: {}", s), - ParserError::TruncatedMsg(s) => write!(f, "Error: {}", s), - ParserError::Unsupported(s) => write!(f, "Error: {}", s), - ParserError::EofExpected => write!(f, "Error: reach end of file"), - ParserError::OneIoError(e) => write!(f, "Error: {}", e), - ParserError::FilterError(e) => write!(f, "Error: {}", e), - ParserError::IoNotEnoughBytes() => write!(f, "Error: Not enough bytes to read"), - } - } -} - -impl From for ParserErrorWithBytes { - fn from(error: OneIoError) -> Self { - ParserErrorWithBytes { - error: ParserError::OneIoError(error), - bytes: None, +impl From> for ParserError +where + T: TryFromPrimitive, + T::Primitive: Into, +{ + #[inline] + fn from(value: TryFromPrimitiveError) -> Self { + ParserError::UnrecognizedEnumVariant { + type_name: T::NAME, + value: value.number.into(), } } } - -impl From for ParserError { - fn from(error: OneIoError) -> Self { - ParserError::OneIoError(error) - } -} - -impl From for ParserErrorWithBytes { - fn from(error: ParserError) -> Self { - ParserErrorWithBytes { error, bytes: None } - } -} - -impl From for ParserError { - fn from(io_error: io::Error) -> Self { - match io_error.kind() { - ErrorKind::UnexpectedEof => ParserError::EofError(io_error), - _ => ParserError::IoError(io_error), - } - } -} - -impl From> for ParserError { - fn from(value: TryFromPrimitiveError) -> Self { - ParserError::ParseError(format!("cannot parse bgp4mp subtype: {}", value.number)) - } -} - -impl From> for ParserError { - fn from(value: TryFromPrimitiveError) -> Self { - ParserError::ParseError(format!("cannot parse bgp4mp state: {}", value.number)) - } -} - -impl From> for ParserError { - fn from(value: TryFromPrimitiveError) -> Self { - ParserError::ParseError(format!("cannot parse table dump v2 type: {}", value.number)) - } -} - -impl From> for ParserError { - fn from(value: TryFromPrimitiveError) -> Self { - ParserError::ParseError(format!("cannot parse entry type: {}", value.number)) - } -} - -impl From> for ParserError { - fn from(value: TryFromPrimitiveError) -> Self { - ParserError::ParseError(format!("Unknown AFI type: {}", value.number)) - } -} - -impl From> for ParserError { - fn from(value: TryFromPrimitiveError) -> Self { - ParserError::ParseError(format!("Unknown SAFI type: {}", value.number)) - } -} diff --git a/src/lib.rs b/src/lib.rs index 6d10ee6..856f3fd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ Here is an example that does so. use bgpkit_parser::BgpkitParser; let parser = BgpkitParser::new("http://archive.routeviews.org/bgpdata/2021.10/UPDATES/updates.20211001.0000.bz2").unwrap(); for elem in parser { - println!("{}", elem) + println!("{}", elem.unwrap()) } ``` @@ -72,6 +72,7 @@ for item in broker.into_iter().take(2) { // iterating through the parser. the iterator returns `BgpElem` one at a time. let elems = parser .into_elem_iter() + .filter_map(Result::ok) .filter_map(|elem| { if let Some(origins) = &elem.origin_asns { if origins.contains(&13335.into()) { @@ -100,6 +101,7 @@ For all types of filters, check out the [Filter][filter] enum documentation. ```no_run use bgpkit_parser::BgpkitParser; +use bgpkit_parser::filter::Filter; /// This example shows how to parse a MRT file and filter by prefix. env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); @@ -108,12 +110,12 @@ log::info!("downloading updates file"); // create a parser that takes the buffered reader let parser = BgpkitParser::new("http://archive.routeviews.org/bgpdata/2021.10/UPDATES/updates.20211001.0000.bz2").unwrap() - .add_filter("prefix", "211.98.251.0/24").unwrap(); + .add_filter(Filter::prefix("211.98.251.0/24").unwrap()); log::info!("parsing updates file"); // iterating through the parser. the iterator returns `BgpElem` one at a time. for elem in parser { - log::info!("{}", &elem); + log::info!("{}", elem.unwrap()); } log::info!("done"); ``` diff --git a/src/models/bgp/aspath/builder.rs b/src/models/bgp/aspath/builder.rs new file mode 100644 index 0000000..6e965a0 --- /dev/null +++ b/src/models/bgp/aspath/builder.rs @@ -0,0 +1,164 @@ +use crate::models::aspath::storage::{AsPathStorage, SingleSequenceStorage}; +use crate::models::{AsPath, AsPathSegment, Asn}; +use smallvec::SmallVec; +use std::borrow::Cow; + +pub struct AsPathBuilder { + storage: AsPathStorage, + first_sequence: bool, +} + +impl AsPathBuilder { + #[inline(always)] + pub fn new() -> Self { + AsPathBuilder::default() + } + + /// Appends an ASN to the end of the last segment in the path. If there are no segments or the + /// last segment is not an `AS_PATH_AS_SEQUENCE`, a new segment will be added. + pub fn push_sequence_asn(&mut self, asn: Asn) { + self.first_sequence = false; + match &mut self.storage { + AsPathStorage::SingleSequence(seq) => seq.push(asn), + AsPathStorage::Mixed(segments) => { + if let Some(AsPathSegment::AsSequence(seq)) = segments.last_mut() { + seq.to_mut().push(asn); + } + + segments.push(AsPathSegment::AsSequence(Cow::Owned(vec![asn]))); + } + } + } + + pub fn push_segment(&mut self, segment: AsPathSegment<'static>) { + let segments = self.storage.switch_to_mixed_storage(!self.first_sequence); + segments.push(segment); + } + + /// Begin a new AS sequence within this path being built. The given length is used similarly to + /// [Vec::with_capacity] to perform pre-allocation of the underlying storage. + #[inline(always)] + pub fn begin_as_sequence(&mut self, length: usize) -> AsPathSegmentBuilder { + let storage = &mut self.storage; + if self.first_sequence { + if let AsPathStorage::SingleSequence(seq) = storage { + self.first_sequence = false; + seq.reserve_exact(length); + return AsPathSegmentBuilder { + inner: AsPathSegmentBuilderInner::InPlace(seq), + }; + } + } + + Self::begin_sequence_cold_path(storage, length) + } + + /// Begin a new AS set within this path being built. The given length is used similarly to + /// [Vec::with_capacity] to perform pre-allocation of the underlying storage. + #[cold] + pub fn begin_as_set(&mut self, length: usize) -> AsPathSegmentBuilder { + let segments = self.storage.switch_to_mixed_storage(!self.first_sequence); + segments.push(AsPathSegment::AsSet(Cow::Owned(Vec::with_capacity(length)))); + + if let Some(AsPathSegment::AsSet(Cow::Owned(asns))) = segments.last_mut() { + AsPathSegmentBuilder { + inner: AsPathSegmentBuilderInner::Heap(asns), + } + } else { + unreachable!("Last segment will match the item pushed to the vec") + } + } + + /// Begin a new confed sequence within this path being built. The given length is used similarly to + /// [Vec::with_capacity] to perform pre-allocation of the underlying storage. + #[cold] + pub fn begin_confed_sequence(&mut self, length: usize) -> AsPathSegmentBuilder { + let segments = self.storage.switch_to_mixed_storage(!self.first_sequence); + segments.push(AsPathSegment::ConfedSequence(Cow::Owned( + Vec::with_capacity(length), + ))); + + if let Some(AsPathSegment::ConfedSequence(Cow::Owned(asns))) = segments.last_mut() { + AsPathSegmentBuilder { + inner: AsPathSegmentBuilderInner::Heap(asns), + } + } else { + unreachable!("Last segment will match the item pushed to the vec") + } + } + + /// Begin a new confed set within this path being built. The given length is used similarly to + /// [Vec::with_capacity] to perform pre-allocation of the underlying storage. + #[cold] + pub fn begin_confed_set(&mut self, length: usize) -> AsPathSegmentBuilder { + let segments = self.storage.switch_to_mixed_storage(!self.first_sequence); + segments.push(AsPathSegment::ConfedSet(Cow::Owned(Vec::with_capacity( + length, + )))); + + if let Some(AsPathSegment::ConfedSet(Cow::Owned(asns))) = segments.last_mut() { + AsPathSegmentBuilder { + inner: AsPathSegmentBuilderInner::Heap(asns), + } + } else { + unreachable!("Last segment will match the item pushed to the vec") + } + } + + #[inline] + pub fn build(self) -> AsPath { + AsPath { + storage: self.storage, + } + } + + #[cold] + fn begin_sequence_cold_path( + storage: &mut AsPathStorage, + length: usize, + ) -> AsPathSegmentBuilder { + let segments = storage.switch_to_mixed_storage(true); + + segments.push(AsPathSegment::AsSequence(Cow::Owned(Vec::with_capacity( + length, + )))); + + if let Some(AsPathSegment::AsSequence(Cow::Owned(asns))) = segments.last_mut() { + AsPathSegmentBuilder { + inner: AsPathSegmentBuilderInner::Heap(asns), + } + } else { + unreachable!("Last segment will match the item pushed to the vec") + } + } +} + +impl Default for AsPathBuilder { + #[inline] + fn default() -> Self { + AsPathBuilder { + storage: AsPathStorage::SingleSequence(SmallVec::new()), + first_sequence: true, + } + } +} + +#[repr(transparent)] +pub struct AsPathSegmentBuilder<'a> { + inner: AsPathSegmentBuilderInner<'a>, +} + +enum AsPathSegmentBuilderInner<'a> { + InPlace(&'a mut SingleSequenceStorage), + Heap(&'a mut Vec), +} + +impl<'a> AsPathSegmentBuilder<'a> { + #[inline(always)] + pub fn push(&mut self, asn: Asn) { + match &mut self.inner { + AsPathSegmentBuilderInner::InPlace(arr) => arr.push(asn), + AsPathSegmentBuilderInner::Heap(arr) => arr.push(asn), + } + } +} diff --git a/src/models/bgp/aspath/iters.rs b/src/models/bgp/aspath/iters.rs new file mode 100644 index 0000000..3104508 --- /dev/null +++ b/src/models/bgp/aspath/iters.rs @@ -0,0 +1,265 @@ +use crate::models::aspath::storage::{AsPathStorage, MixedStorage}; +use crate::models::{AsPath, AsPathSegment, Asn}; +use smallvec::smallvec; +use std::borrow::Cow; +use std::iter::Copied; +use std::marker::PhantomData; +use std::slice; + +impl AsPathSegment<'_> { + /// Get an iterator over the ASNs within this path segment + pub fn iter(&self) -> <&'_ Self as IntoIterator>::IntoIter { + self.into_iter() + } + + /// Get a mutable iterator over the ASNs within this path segment + pub fn iter_mut(&mut self) -> <&'_ mut Self as IntoIterator>::IntoIter { + self.into_iter() + } +} + +pub enum MaybeOwnedIntoIter<'a, T: Copy> { + Borrow(Copied>), + Owned(std::vec::IntoIter), +} + +impl<'a, T: Copy> Iterator for MaybeOwnedIntoIter<'a, T> { + type Item = T; + + fn next(&mut self) -> Option { + match self { + MaybeOwnedIntoIter::Borrow(x) => x.next(), + MaybeOwnedIntoIter::Owned(x) => x.next(), + } + } +} + +impl<'a> IntoIterator for AsPathSegment<'a> { + type Item = Asn; + type IntoIter = MaybeOwnedIntoIter<'a, Asn>; + + fn into_iter(self) -> Self::IntoIter { + let (AsPathSegment::AsSequence(x) + | AsPathSegment::AsSet(x) + | AsPathSegment::ConfedSequence(x) + | AsPathSegment::ConfedSet(x)) = self; + + match x { + Cow::Borrowed(y) => MaybeOwnedIntoIter::Borrow(y.iter().copied()), + Cow::Owned(y) => MaybeOwnedIntoIter::Owned(y.into_iter()), + } + } +} + +impl<'a, 'b: 'a> IntoIterator for &'a AsPathSegment<'b> { + type Item = &'a Asn; + type IntoIter = std::slice::Iter<'a, Asn>; + + fn into_iter(self) -> Self::IntoIter { + let (AsPathSegment::AsSequence(x) + | AsPathSegment::AsSet(x) + | AsPathSegment::ConfedSequence(x) + | AsPathSegment::ConfedSet(x)) = self; + x.iter() + } +} + +impl<'a, 'b: 'a> IntoIterator for &'a mut AsPathSegment<'b> { + type Item = &'a mut Asn; + type IntoIter = std::slice::IterMut<'a, Asn>; + + fn into_iter(self) -> Self::IntoIter { + let (AsPathSegment::AsSequence(x) + | AsPathSegment::AsSet(x) + | AsPathSegment::ConfedSequence(x) + | AsPathSegment::ConfedSet(x)) = self; + x.to_mut().iter_mut() + } +} + +/// This is not a perfect solution since it is theoretically possible that a path could be created +/// with more variations than a u64. That being said, the chances of such a thing occurring are +/// essentially non-existent unless a BGP peer begins announcing maliciously constructed paths. +struct AsPathNumberedRouteIter<'a> { + path: &'a [AsPathSegment<'a>], + index: usize, + route_num: u64, +} + +impl<'a> Iterator for AsPathNumberedRouteIter<'a> { + type Item = Asn; + + fn next(&mut self) -> Option { + loop { + match self.path.first()? { + AsPathSegment::AsSequence(x) => match x.get(self.index) { + None => { + self.index = 0; + self.path = &self.path[1..]; + } + Some(asn) => { + self.index += 1; + return Some(*asn); + } + }, + AsPathSegment::AsSet(x) => { + self.path = &self.path[1..]; + if x.is_empty() { + return Some(Asn::RESERVED); + } + + let asn = x[(self.route_num % x.len() as u64) as usize]; + self.route_num /= x.len() as u64; + return Some(asn); + } + _ => self.path = &self.path[1..], + } + } + } +} + +pub struct AsPathRouteIter<'a, D> { + path: Cow<'a, AsPathStorage>, + route_num: u64, + total_routes: u64, + _phantom: PhantomData, +} + +impl<'a, D> Iterator for AsPathRouteIter<'a, D> +where + D: FromIterator, +{ + type Item = D; + + fn next(&mut self) -> Option { + if self.route_num >= self.total_routes { + return None; + } + + match self.path.as_ref() { + AsPathStorage::SingleSequence(x) => { + self.route_num += 1; + Some(D::from_iter(x.iter().copied())) + } + AsPathStorage::Mixed(path) => { + // Attempt to speed up what is by far the most common case (a path of a single sequence) + if self.route_num == 0 && path.len() == 1 { + if let AsPathSegment::AsSequence(sequence) = &path[0] { + let route = D::from_iter(sequence.iter().copied()); + self.route_num += 1; + return Some(route); + } + } + + let route_asn_iter = AsPathNumberedRouteIter { + path: path.as_ref(), + index: 0, + route_num: self.route_num, + }; + + self.route_num += 1; + Some(D::from_iter(route_asn_iter)) + } + } + } +} + +#[repr(transparent)] +pub struct SegmentIter<'a> { + inner: SegmentIterInner<'a>, +} + +enum SegmentIterInner<'a> { + Single(Option<&'a [Asn]>), + Mixed(slice::Iter<'a, AsPathSegment<'static>>), +} + +impl<'a> Iterator for SegmentIter<'a> { + type Item = AsPathSegment<'a>; + + fn next(&mut self) -> Option { + match &mut self.inner { + SegmentIterInner::Single(x) => { + x.take().map(Cow::Borrowed).map(AsPathSegment::AsSequence) + } + SegmentIterInner::Mixed(x) => x.next().map(|x| x.borrowed()), + } + } +} + +pub type SegmentIntoIter = ::IntoIter; + +impl AsPath { + pub fn iter_segments(&self) -> SegmentIter<'_> { + let inner = match &self.storage { + AsPathStorage::SingleSequence(x) => SegmentIterInner::Single(Some(x)), + AsPathStorage::Mixed(x) => SegmentIterInner::Mixed(x.iter()), + }; + + SegmentIter { inner } + } + + pub fn into_segments_iter(self) -> SegmentIntoIter { + match self.storage { + AsPathStorage::SingleSequence(asns) => { + let segment = AsPathSegment::AsSequence(Cow::Owned(asns.to_vec())); + smallvec![segment].into_iter() + } + AsPathStorage::Mixed(segments) => segments.into_iter(), + } + } + + /// Gets an iterator over all possible routes this path represents. + pub fn iter_routes(&self) -> AsPathRouteIter<'_, D> + where + D: FromIterator, + { + AsPathRouteIter { + path: Cow::Borrowed(&self.storage), + route_num: 0, + total_routes: self.num_route_variations(), + _phantom: PhantomData, + } + } + + /// Iterate through the originating ASNs of this path. This functionality is provided for + /// completeness, but in almost all cases this iterator should only contain a single element. + /// Alternatively, [AsPath::get_singular_origin] can be used if + pub fn iter_origins(&self) -> impl '_ + Iterator { + let origin_slice = match &self.storage { + AsPathStorage::SingleSequence(v) => v.last().map(slice::from_ref).unwrap_or(&[]), + AsPathStorage::Mixed(segments) => match segments.last() { + Some(AsPathSegment::AsSequence(v)) => v.last().map(slice::from_ref).unwrap_or(&[]), + Some(AsPathSegment::AsSet(v)) => v.as_ref(), + _ => &[], + }, + }; + + origin_slice.iter().copied() + } +} + +/// Iterates over all route variations the given `AsPath` represents. +impl<'a> IntoIterator for &'a AsPath { + type Item = Vec; + type IntoIter = AsPathRouteIter<'a, Vec>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_routes() + } +} + +/// Iterates over all route variations the given `AsPath` represents. +impl IntoIterator for AsPath { + type Item = Vec; + type IntoIter = AsPathRouteIter<'static, Vec>; + + fn into_iter(self) -> Self::IntoIter { + AsPathRouteIter { + total_routes: self.num_route_variations(), + path: Cow::Owned(self.storage), + route_num: 0, + _phantom: PhantomData, + } + } +} diff --git a/src/models/bgp/aspath/mod.rs b/src/models/bgp/aspath/mod.rs new file mode 100644 index 0000000..1b5b892 --- /dev/null +++ b/src/models/bgp/aspath/mod.rs @@ -0,0 +1,577 @@ +use crate::models::*; +use itertools::Itertools; +use std::borrow::Cow; +use std::fmt::{Display, Formatter}; +use std::hash::{Hash, Hasher}; +use std::mem::discriminant; + +use crate::models::aspath::storage::{AsPathStorage, MixedStorage}; +use crate::models::builder::AsPathBuilder; +pub use iters::*; + +pub mod builder; +pub mod iters; + +mod storage; + +#[cfg(feature = "serde")] +mod serde_impl; + +#[cfg(test)] +mod tests; + +/// Enum of AS path segment. +#[derive(Debug, Clone)] +pub enum AsPathSegment<'a> { + AsSequence(Cow<'a, [Asn]>), + AsSet(Cow<'a, [Asn]>), + ConfedSequence(Cow<'a, [Asn]>), + ConfedSet(Cow<'a, [Asn]>), +} + +impl<'a> AsPathSegment<'a> { + pub fn borrowed(&self) -> AsPathSegment { + match self { + AsPathSegment::AsSequence(x) => AsPathSegment::AsSequence(Cow::Borrowed(&**x)), + AsPathSegment::AsSet(x) => AsPathSegment::AsSet(Cow::Borrowed(&**x)), + AsPathSegment::ConfedSequence(x) => AsPathSegment::ConfedSequence(Cow::Borrowed(&**x)), + AsPathSegment::ConfedSet(x) => AsPathSegment::ConfedSet(Cow::Borrowed(&**x)), + } + } + + pub fn to_static_owned(&self) -> AsPathSegment<'static> { + match self { + AsPathSegment::AsSequence(x) => AsPathSegment::AsSequence(Cow::Owned(x.to_vec())), + AsPathSegment::AsSet(x) => AsPathSegment::AsSet(Cow::Owned(x.to_vec())), + AsPathSegment::ConfedSequence(x) => { + AsPathSegment::ConfedSequence(Cow::Owned(x.to_vec())) + } + AsPathSegment::ConfedSet(x) => AsPathSegment::ConfedSet(Cow::Owned(x.to_vec())), + } + } + + /// Shorthand for creating an `AsSequence` segment. + pub fn sequence>(seq: S) -> Self { + AsPathSegment::AsSequence(seq.as_ref().iter().copied().map_into().collect()) + } + + /// Shorthand for creating an `AsSet` segment. + pub fn set>(seq: S) -> Self { + AsPathSegment::AsSet(seq.as_ref().iter().copied().map_into().collect()) + } + + /// Get the number of ASNs this segment adds to the route. For the number of ASNs within the + /// segment use [AsPathSegment::len] instead. + pub fn route_len(&self) -> usize { + match self { + AsPathSegment::AsSequence(v) => v.len(), + AsPathSegment::AsSet(_) => 1, + AsPathSegment::ConfedSequence(_) | AsPathSegment::ConfedSet(_) => 0, + } + } + + /// Ge the total number of ASNs within this segment. For the number of ASNs this segment adds to + /// a packet's route, use [AsPathSegment::route_len] instead. + pub fn len(&self) -> usize { + self.as_ref().len() + } + + /// Returns true if this segment has a length of 0. + pub fn is_empty(&self) -> bool { + self.as_ref().is_empty() + } + + /// Gets if a segment represents the local members of an autonomous system confederation. + /// Shorthand for `matches!(x, AsPathSegment::ConfedSequence(_) | AsPathSegment::ConfedSet(_))`. + /// + /// + pub fn is_confed(&self) -> bool { + matches!( + self, + AsPathSegment::ConfedSequence(_) | AsPathSegment::ConfedSet(_) + ) + } + + /// Merge two [AsPathSegment]s in place and return if the merge was successful. + /// + /// See [AsPath::coalesce] for more information. + fn merge_in_place(&mut self, other: &mut Self) -> bool { + use AsPathSegment::*; + + match (self, other) { + (AsSequence(x), AsSequence(y)) | (ConfedSequence(x), ConfedSequence(y)) => { + x.to_mut().extend_from_slice(y); + true + } + (x @ (AsSequence(_) | ConfedSequence(_)), y) if x.is_empty() => { + std::mem::swap(x, y); + true + } + (_, AsSequence(y) | ConfedSequence(y)) if y.is_empty() => true, + _ => false, + } + } + + /// A much more aggressive version of [AsPathSegment::merge_in_place] which de-duplicates and + /// converts sets with only 1 ASN to sequences. + /// + /// See [AsPath::dedup_coalesce] for more information. + fn dedup_merge_in_place(&mut self, other: &mut Self) -> bool { + use AsPathSegment::*; + + other.dedup(); + match (self, other) { + (AsSequence(x), AsSequence(y)) | (ConfedSequence(x), ConfedSequence(y)) => { + let x_mut = x.to_mut(); + x_mut.extend_from_slice(y); + x_mut.dedup(); + true + } + (x @ (AsSequence(_) | ConfedSequence(_)), y) if x.is_empty() => { + std::mem::swap(x, y); + true + } + (_, AsSequence(y) | ConfedSequence(y)) if y.is_empty() => true, + _ => false, + } + } + + /// Deduplicate ASNs in this path segment. Additionally, sets are sorted and may be converted to + /// sequences if they only have a single element. + /// + /// See [AsPath::dedup_coalesce] for more information. + fn dedup(&mut self) { + match self { + AsPathSegment::AsSequence(x) | AsPathSegment::ConfedSequence(x) => x.to_mut().dedup(), + AsPathSegment::AsSet(x) => { + let x_mut = x.to_mut(); + x_mut.sort_unstable(); + x_mut.dedup(); + if x.len() == 1 { + *self = AsPathSegment::AsSequence(std::mem::take(x)); + } + } + AsPathSegment::ConfedSet(x) => { + let x_mut = x.to_mut(); + x_mut.sort_unstable(); + x_mut.dedup(); + if x.len() == 1 { + *self = AsPathSegment::ConfedSequence(std::mem::take(x)); + } + } + } + } +} + +impl AsRef<[Asn]> for AsPathSegment<'_> { + fn as_ref(&self) -> &[Asn] { + let (AsPathSegment::AsSequence(x) + | AsPathSegment::AsSet(x) + | AsPathSegment::ConfedSequence(x) + | AsPathSegment::ConfedSet(x)) = self; + x + } +} + +impl Hash for AsPathSegment<'_> { + fn hash(&self, state: &mut H) { + // Hash the discriminant since we do not differentiate between confederation segments + discriminant(self).hash(state); + + let set = match self { + AsPathSegment::AsSequence(x) | AsPathSegment::ConfedSequence(x) => { + return x.hash(state); + } + AsPathSegment::AsSet(x) | AsPathSegment::ConfedSet(x) => x, + }; + + // FIXME: Once is_sorted is stabilized, call it first to determine if sorting is required + if set.len() <= 32 { + let mut buffer = [Asn::new_32bit(0); 32]; + set.iter() + .zip(&mut buffer) + .for_each(|(asn, buffer)| *buffer = *asn); + + let slice = &mut buffer[..set.len()]; + slice.sort_unstable(); + Asn::hash_slice(slice, state); + return; + } + + // Fallback to allocating a Vec on the heap to sort + set.iter().sorted().for_each(|x| x.hash(state)); + } +} + +/// Check for equality of two path segments. +/// ```rust +/// # use bgpkit_parser::models::AsPathSegment; +/// let a = AsPathSegment::sequence([1, 2, 3]); +/// let b = AsPathSegment::set([1, 2, 3]); +/// +/// // Sequences must be identical to be considered equivalent +/// assert_eq!(a, AsPathSegment::sequence([1, 2, 3])); +/// assert_ne!(a, AsPathSegment::sequence([1, 2, 3, 3])); +/// +/// // Sets may be reordered, but must contain exactly the same ASNs. +/// assert_eq!(b, AsPathSegment::set([3, 1, 2])); +/// assert_ne!(b, AsPathSegment::set([1, 2, 3, 3])); +/// ``` +impl PartialEq for AsPathSegment<'_> { + fn eq(&self, other: &Self) -> bool { + let (x, y) = match (self, other) { + (AsPathSegment::AsSequence(x), AsPathSegment::AsSequence(y)) + | (AsPathSegment::ConfedSequence(x), AsPathSegment::ConfedSequence(y)) => { + return x == y; + } + (AsPathSegment::AsSet(x), AsPathSegment::AsSet(y)) + | (AsPathSegment::ConfedSet(x), AsPathSegment::ConfedSet(y)) => (x, y), + _ => return false, + }; + + // Attempt to exit early + if x.len() != y.len() { + return false; + } else if x == y { + return true; + } + + if x.len() <= 32 { + let mut x_buffer = [Asn::new_32bit(0); 32]; + let mut y_buffer = [Asn::new_32bit(0); 32]; + x.iter() + .zip(&mut x_buffer) + .for_each(|(asn, buffer)| *buffer = *asn); + y.iter() + .zip(&mut y_buffer) + .for_each(|(asn, buffer)| *buffer = *asn); + + x_buffer[..x.len()].sort_unstable(); + y_buffer[..y.len()].sort_unstable(); + return x_buffer[..x.len()] == y_buffer[..y.len()]; + } + + x.iter() + .sorted() + .zip(y.iter().sorted()) + .all(|(a, b)| a == b) + } +} + +impl Eq for AsPathSegment<'_> {} + +#[derive(Debug, PartialEq, Clone, Eq, Default, Hash)] +#[repr(transparent)] +pub struct AsPath { + storage: AsPathStorage, +} + +impl AsPath { + pub fn new() -> AsPath { + AsPath::default() + } + + #[inline] + pub fn builder() -> AsPathBuilder { + AsPathBuilder::default() + } + + /// Shorthand for creating an `AsPath` consisting of a single `AsSequence` segment. + pub fn from_sequence>(seq: S) -> Self { + let storage = + AsPathStorage::SingleSequence(seq.as_ref().iter().copied().map_into().collect()); + AsPath { storage } + } + + pub fn from_segments(segments: I) -> AsPath + where + I: IntoIterator>, + { + Self::from_iter(segments) + } + + /// Adds a new segment to the end of the path. This will change the origin of the path. No + /// validation or merging the segment is performed during this step. + pub fn append_segment(&mut self, segment: AsPathSegment<'static>) { + self.storage.switch_to_mixed_storage(true).push(segment); + } + + /// Check if the path is empty. Note that a non-empty path may have a route length of 0 due to + /// empty segments or confederation segments. + #[inline] + pub fn is_empty(&self) -> bool { + self.storage.is_empty() + } + + /// Get the total length of the routes this path represents. For example, if this route + /// contained a sequence of 5 ASNs followed by a set of 3 ASNs, the total route length would be + /// 6. + /// + /// Confederation segments do not count towards the total route length. This means it is + /// possible to have a non-empty AsPath with a length of 0. + pub fn route_len(&self) -> usize { + self.iter_segments() + .map(|segment| segment.route_len()) + .sum() + } + + /// Get the number of segments that make up this path. For the number of ASNs in routes + /// represented by this path, use [AsPath::route_len]. + pub fn len(&self) -> usize { + self.storage.len() + } + + /// Get the total number of routes this path represents. This function assumes the total number + /// of route variations can be represented by a u64. + pub fn num_route_variations(&self) -> u64 { + let mut variations: u64 = 1; + + for segment in self.iter_segments() { + if let AsPathSegment::AsSet(x) = segment { + variations *= x.len() as u64; + } + } + + variations + } + + /// Checks if any segments of this [AsPath] contain the following ASN. + pub fn contains_asn(&self, x: Asn) -> bool { + self.iter_segments().flatten().contains(&x) + } + + /// Coalesce this [AsPath] into the minimum number of segments required without changing the + /// values along the path. This can be helpful as some BGP servers will prepend additional + /// segments without coalescing sequences. For de-duplicating see [AsPath::dedup_coalesce]. + /// + /// Changes applied by this function: + /// - Merge adjacent AS_SEQUENCE segments + /// - Merge adjacent AS_CONFED_SEQUENCE segments + /// - Removing empty AS_SEQUENCE and AS_CONFED_SEQUENCE segments + /// + /// ```rust + /// # use bgpkit_parser::models::{AsPath, AsPathSegment}; + /// let mut a = AsPath::from_segments(vec![ + /// AsPathSegment::sequence([1, 2]), + /// AsPathSegment::sequence([]), + /// AsPathSegment::sequence([2]), + /// AsPathSegment::set([2]), + /// AsPathSegment::set([5, 3, 3, 2]), + /// ]); + /// + /// let expected = AsPath::from_segments(vec![ + /// AsPathSegment::sequence([1, 2, 2]), + /// AsPathSegment::set([2]), + /// AsPathSegment::set([5, 3, 3, 2]), + /// ]); + /// + /// a.coalesce(); + /// assert_eq!(a, expected); + /// ``` + /// If there is only one segment, no changes will occur. This function will not attempt to + /// deduplicate sequences or alter sets. + pub fn coalesce(&mut self) { + if let AsPathStorage::Mixed(segments) = &mut self.storage { + let mut end_index = 0; + let mut scan_index = 1; + + while scan_index < segments.len() { + let (a, b) = segments.split_at_mut(scan_index); + if !AsPathSegment::merge_in_place(&mut a[end_index], &mut b[0]) { + end_index += 1; + segments.swap(end_index, scan_index); + } + scan_index += 1; + } + + segments.truncate(end_index + 1); + } + } + + /// A more aggressive version of [AsPath::coalesce] which also de-duplicates ASNs within this + /// path and converts sets of a single ASN to sequences. Some BGP servers will prepend their own + /// ASN multiple times when announcing a path to artificially increase the route length and make + /// the route seem less less desirable to peers.This function is best suited for use-cases which + /// only care about transitions between ASes along the path. + /// + /// Changes applied by this function: + /// - Merge adjacent AS_SEQUENCE segments + /// - Merge adjacent AS_CONFED_SEQUENCE segments + /// - Removing empty AS_SEQUENCE and AS_CONFED_SEQUENCE segments + /// - De-duplicate ASNs in AS_SEQUENCE and AS_CONFED_SEQUENCE segments + /// - Sort and de-duplicate ASNs in AS_SET and AS_CONFED_SET segments + /// - Convert AS_SET and AS_CONFED_SET segments with exactly 1 element to sequences + /// + /// ```rust + /// # use bgpkit_parser::models::{AsPath, AsPathSegment}; + /// let mut a = AsPath::from_segments(vec![ + /// AsPathSegment::sequence([1, 2]), + /// AsPathSegment::sequence([]), + /// AsPathSegment::sequence([2]), + /// AsPathSegment::set([2]), + /// AsPathSegment::set([5, 3, 3, 2]), + /// ]); + /// + /// let expected = AsPath::from_segments(vec![ + /// AsPathSegment::sequence([1, 2]), + /// AsPathSegment::set([2, 3, 5]), + /// ]); + /// + /// a.dedup_coalesce(); + /// assert_eq!(a, expected); + /// ``` + pub fn dedup_coalesce(&mut self) { + match &mut self.storage { + AsPathStorage::SingleSequence(x) => x.dedup(), + AsPathStorage::Mixed(segments) => { + if !segments.is_empty() { + AsPathSegment::dedup(&mut segments[0]); + } + let mut end_index = 0; + let mut scan_index = 1; + + while scan_index < segments.len() { + let (a, b) = segments.split_at_mut(scan_index); + if !AsPathSegment::dedup_merge_in_place(&mut a[end_index], &mut b[0]) { + end_index += 1; + segments.swap(end_index, scan_index); + } + scan_index += 1; + } + + segments.truncate(end_index + 1); + } + } + } + + /// Checks if two paths correspond to equivalent routes. Unlike `a == b`, this function will + /// ignore duplicate ASNs by comparing the coalesced versions of each path. + /// + /// This is equivalent to [AsPath::eq] after calling [AsPath::dedup_coalesce] on both paths. + pub fn has_equivalent_routing(&self, other: &Self) -> bool { + let mut a = self.to_owned(); + let mut b = other.to_owned(); + + a.dedup_coalesce(); + b.dedup_coalesce(); + + a == b + } + + /// Get the length of ASN required to store all of the ASNs within this path + pub fn required_asn_length(&self) -> AsnLength { + self.iter_segments() + .flatten() + .map(|x| x.required_len()) + .fold(AsnLength::Bits16, |a, b| match (a, b) { + (AsnLength::Bits16, AsnLength::Bits16) => AsnLength::Bits16, + _ => AsnLength::Bits32, + }) + } + + /// Construct AsPath from AS_PATH and AS4_PATH + /// + /// + /// + /// ```text + /// If the number of AS numbers in the AS_PATH attribute is less than the + /// number of AS numbers in the AS4_PATH attribute, then the AS4_PATH + /// attribute SHALL be ignored, and the AS_PATH attribute SHALL be taken + /// as the AS path information. + /// + /// If the number of AS numbers in the AS_PATH attribute is larger than + /// or equal to the number of AS numbers in the AS4_PATH attribute, then + /// the AS path information SHALL be constructed by taking as many AS + /// numbers and path segments as necessary from the leading part of the + /// AS_PATH attribute, and then prepending them to the AS4_PATH attribute + /// so that the AS path information has a number of AS numbers identical + /// to that of the AS_PATH attribute. Note that a valid + /// AS_CONFED_SEQUENCE or AS_CONFED_SET path segment SHALL be prepended + /// if it is either the leading path segment or is adjacent to a path + /// segment that is prepended. + /// ``` + pub fn merge_aspath_as4path(aspath: &AsPath, as4path: &AsPath) -> Option { + if as4path.is_empty() || aspath.route_len() < as4path.route_len() { + return Some(aspath.clone()); + } + + let mut new_segs = MixedStorage::new(); + + for (seg, as4seg) in aspath.iter_segments().zip(as4path.iter_segments()) { + if let (AsPathSegment::AsSequence(seq), AsPathSegment::AsSequence(seq4)) = + (&seg, &as4seg) + { + let diff_len = seq.len() - seq4.len(); + let mut new_seq: Vec = vec![]; + new_seq.extend(seq.iter().copied().take(diff_len)); + new_seq.extend(seq4.iter().copied()); + new_segs.push(AsPathSegment::AsSequence(Cow::Owned(new_seq))); + } else { + new_segs.push(as4seg.to_static_owned()); + } + } + + Some(AsPath { + storage: AsPathStorage::Mixed(new_segs), + }) + } + + /// This function serves as a alternative to [AsPath::iter_origins] which attempts to make the + /// assumption that a path can only have exactly one origin. If a path does not have exactly 1 + /// origin (such as when empty or ending in a set), then `None` will be returned instead. + pub fn get_singular_origin(&self) -> Option { + match &self.storage { + AsPathStorage::SingleSequence(x) => x.last().copied(), + AsPathStorage::Mixed(segments) => match segments.last() { + Some(AsPathSegment::AsSequence(v)) => v.last().copied(), + Some(AsPathSegment::AsSet(v)) if v.len() == 1 => Some(v[0]), + _ => None, + }, + } + } +} + +impl FromIterator> for AsPath { + fn from_iter>>(iter: T) -> Self { + AsPath { + storage: AsPathStorage::from_iter(iter), + } + } +} + +impl Display for AsPath { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + for (index, segment) in self.iter_segments().enumerate() { + if index != 0 { + write!(f, " ")?; + } + + match segment { + AsPathSegment::AsSequence(v) | AsPathSegment::ConfedSequence(v) => { + let mut asn_iter = v.iter(); + if let Some(first_element) = asn_iter.next() { + write!(f, "{}", first_element)?; + + for asn in asn_iter { + write!(f, " {}", asn)?; + } + } + } + AsPathSegment::AsSet(v) | AsPathSegment::ConfedSet(v) => { + write!(f, "{{")?; + let mut asn_iter = v.iter(); + if let Some(first_element) = asn_iter.next() { + write!(f, "{}", first_element)?; + + for asn in asn_iter { + write!(f, ",{}", asn)?; + } + } + write!(f, "}}")?; + } + } + } + + Ok(()) + } +} diff --git a/src/models/bgp/aspath/serde_impl.rs b/src/models/bgp/aspath/serde_impl.rs new file mode 100644 index 0000000..b2338d4 --- /dev/null +++ b/src/models/bgp/aspath/serde_impl.rs @@ -0,0 +1,226 @@ +use super::*; +use crate::models::builder::AsPathBuilder; +use serde::de::{SeqAccess, Visitor}; +use serde::ser::SerializeSeq; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::borrow::Cow; + +/// Segment type names using names from RFC3065. +/// +/// +#[allow(non_camel_case_types)] +#[derive(Serialize, Deserialize)] +enum SegmentType { + AS_SET, + AS_SEQUENCE, + AS_CONFED_SEQUENCE, + AS_CONFED_SET, +} + +#[derive(Serialize, Deserialize)] +struct VerboseSegment<'s> { + ty: SegmentType, + values: Cow<'s, [Asn]>, +} + +impl Serialize for AsPathSegment<'_> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let (ty, elements) = match self { + AsPathSegment::AsSequence(x) => (SegmentType::AS_SEQUENCE, x.as_ref()), + AsPathSegment::AsSet(x) => (SegmentType::AS_SET, x.as_ref()), + AsPathSegment::ConfedSequence(x) => (SegmentType::AS_CONFED_SEQUENCE, x.as_ref()), + AsPathSegment::ConfedSet(x) => (SegmentType::AS_CONFED_SET, x.as_ref()), + }; + + let verbose = VerboseSegment { + ty, + values: Cow::Borrowed(elements), + }; + + verbose.serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for AsPathSegment<'_> { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let verbose = VerboseSegment::deserialize(deserializer)?; + + let values = verbose.values; + match verbose.ty { + SegmentType::AS_SET => Ok(AsPathSegment::AsSet(values)), + SegmentType::AS_SEQUENCE => Ok(AsPathSegment::AsSequence(values)), + SegmentType::AS_CONFED_SEQUENCE => Ok(AsPathSegment::ConfedSequence(values)), + SegmentType::AS_CONFED_SET => Ok(AsPathSegment::ConfedSet(values)), + } + } +} + +/// Check if we can serialize an `AsPath` using the simplified format and get the number of +/// elements to do so. The ambiguities that could prevent us from doing so are confederation +/// segments and adjacent sequence segments. +fn simplified_format_len(storage: &AsPathStorage) -> Option { + match storage { + AsPathStorage::SingleSequence(x) => Some(x.len()), + AsPathStorage::Mixed(segments) => { + let mut elements = 0; + let mut prev_was_sequence = false; + for segment in segments { + match segment { + AsPathSegment::AsSequence(seq) if !prev_was_sequence => { + prev_was_sequence = true; + elements += seq.len(); + } + AsPathSegment::AsSet(_) => { + prev_was_sequence = false; + elements += 1; + } + _ => return None, + } + } + + Some(elements) + } + } +} + +/// # Serialization format +/// For the sake of readability and ease of use within other applications, there are verbose and +/// simplified variants for serialization. +/// +/// ## Simplified format +/// The simplified format is the default preferred serialization format. This format does not +/// cover confederation segments and involves a single list of ASNs within the path sequence. +/// For sets, a list of set members is used in place of an ASN. +/// ```rust +/// # use bgpkit_parser::models::{Asn, AsPath, AsPathSegment}; +/// +/// let a: AsPath = serde_json::from_str("[123, 942, 102]").unwrap(); +/// let b: AsPath = serde_json::from_str("[231, 432, [643, 836], 352]").unwrap(); +/// +/// assert_eq!(&a.iter_segments().collect::>(), &[ +/// AsPathSegment::sequence([123, 942, 102]) +/// ]); +/// assert_eq!(&b.iter_segments().collect::>(), &[ +/// AsPathSegment::sequence([231, 432]), +/// AsPathSegment::set([643, 836]), +/// AsPathSegment::sequence([352]) +/// ]); +/// ``` +/// +/// ## Verbose format +/// The verbose format serves as the fallback format for when the simplified format can not be +/// used due to ambiguity. This happens when confederation segments are present, or multiple +/// sequences occur back to back. In this format, segments are explicitly seperated and labeled. +/// Segment types, denoted by the `ty` field, correspond to the names used within RFC3065 +/// (`AS_SET`, `AS_SEQUENCE`, `AS_CONFED_SEQUENCE`, `AS_CONFED_SET`). +/// ```rust +/// # use std::borrow::Cow; +/// use bgpkit_parser::models::{Asn, AsPath}; +/// # use bgpkit_parser::models::AsPathSegment::*; +/// +/// let a = r#"[ +/// { "ty": "AS_CONFED_SEQUENCE", "values": [123, 942] }, +/// { "ty": "AS_SEQUENCE", "values": [773] }, +/// { "ty": "AS_SEQUENCE", "values": [382, 293] } +/// ]"#; +/// +/// let parsed: AsPath = serde_json::from_str(a).unwrap(); +/// assert_eq!(&parsed.iter_segments().collect::>(), &[ +/// ConfedSequence(Cow::Owned(vec![Asn::from(123), Asn::from(942)])), +/// AsSequence(Cow::Owned(vec![Asn::from(773)])), +/// AsSequence(Cow::Owned(vec![Asn::from(382), Asn::from(293)])) +/// ]); +/// ``` +impl Serialize for AsPath { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + if let Some(num_elements) = simplified_format_len(&self.storage) { + // Serialize simplified format + let mut seq_serializer = serializer.serialize_seq(Some(num_elements))?; + + for segment in self.iter_segments() { + match segment { + AsPathSegment::AsSequence(elements) => { + elements + .iter() + .try_for_each(|x| seq_serializer.serialize_element(x))?; + } + AsPathSegment::AsSet(x) => seq_serializer.serialize_element(&*x)?, + _ => unreachable!("simplified_format_len checked for confed segments"), + } + } + return seq_serializer.end(); + } + + // Serialize verbose format + serializer.collect_seq(self.iter_segments()) + } +} + +struct AsPathVisitor; + +impl<'de> Visitor<'de> for AsPathVisitor { + type Value = AsPath; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + formatter.write_str("list of AS_PATH segments") + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + // Technically, we can handle an input that mixes the simplified and verbose formats, + // but we do not want to document this behavior as it may change in future updates. + #[derive(Deserialize)] + #[serde(untagged)] + enum PathElement { + SequenceElement(Asn), + Set(Vec), + Verbose(AsPathSegment<'static>), + } + + let mut append_new_sequence = false; + let mut builder = AsPathBuilder::default(); + while let Some(element) = seq.next_element()? { + match element { + PathElement::SequenceElement(x) => { + if append_new_sequence { + // If the input is mixed between verbose and regular segments, this flag + // is used to prevent appending to a verbose sequence. + append_new_sequence = false; + builder.push_segment(AsPathSegment::AsSequence(Cow::Owned(Vec::new()))); + } + + builder.push_sequence_asn(x); + } + PathElement::Set(values) => { + builder.push_segment(AsPathSegment::AsSet(Cow::Owned(values))); + } + PathElement::Verbose(verbose) => { + append_new_sequence = true; + builder.push_segment(verbose); + } + } + } + + Ok(builder.build()) + } +} + +impl<'de> Deserialize<'de> for AsPath { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_seq(AsPathVisitor) + } +} diff --git a/src/models/bgp/aspath/storage.rs b/src/models/bgp/aspath/storage.rs new file mode 100644 index 0000000..d1b3642 --- /dev/null +++ b/src/models/bgp/aspath/storage.rs @@ -0,0 +1,97 @@ +use crate::models::{AsPathSegment, Asn}; +use smallvec::{smallvec, SmallVec}; +use std::borrow::Cow; +use std::hash::{Hash, Hasher}; +use std::mem::size_of; +use std::slice; + +const STORAGE_SIZE_LIMIT: usize = 64; + +pub type SingleSequenceStorage = SmallVec<[Asn; STORAGE_SIZE_LIMIT / size_of::()]>; +pub type MixedStorage = + SmallVec<[AsPathSegment<'static>; STORAGE_SIZE_LIMIT / size_of::()]>; + +#[derive(Debug, Clone, Eq)] +pub enum AsPathStorage { + /// By far the most common type of AS Path appearing in RIB data is a single sequence of between + /// 1 to ~20 ASNs. We can optimize for this use case by providing space in the structure for + /// those ASNs before allocating to the heap. After checking a couple of RIB table dumps, + /// roughly 75% of AS_PATHs consist of a single sequence with 5 ASNs. By expanding to 16, we + /// can then hold roughly 99.5% of observed AS_PATH attributes on the stack without allocation. + SingleSequence(SingleSequenceStorage), + /// Fallback case where we defer to the typical list of generic segments + Mixed(MixedStorage), +} + +impl Hash for AsPathStorage { + fn hash(&self, state: &mut H) { + match self { + AsPathStorage::SingleSequence(x) => { + let segment = AsPathSegment::AsSequence(Cow::Borrowed(x)); + AsPathSegment::hash_slice(slice::from_ref(&segment), state) + } + AsPathStorage::Mixed(segments) => AsPathSegment::hash_slice(segments, state), + } + } +} + +impl PartialEq for AsPathStorage { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (AsPathStorage::SingleSequence(x), AsPathStorage::SingleSequence(y)) => x == y, + (AsPathStorage::Mixed(x), AsPathStorage::Mixed(y)) => x == y, + (AsPathStorage::SingleSequence(x), AsPathStorage::Mixed(y)) + | (AsPathStorage::Mixed(y), AsPathStorage::SingleSequence(x)) => { + let segment = AsPathSegment::AsSequence(Cow::Borrowed(x)); + slice::from_ref(&segment) == &y[..] + } + } + } +} + +impl Default for AsPathStorage { + fn default() -> Self { + AsPathStorage::SingleSequence(SmallVec::default()) + } +} + +impl FromIterator> for AsPathStorage { + fn from_iter>>(iter: T) -> Self { + AsPathStorage::Mixed(MixedStorage::from_iter(iter)) + } +} + +impl AsPathStorage { + /// Checks if there are any segments in this storage + #[inline] + pub fn is_empty(&self) -> bool { + match self { + // A single sequence still counts as 1 segment even if empty + AsPathStorage::SingleSequence(_) => false, + AsPathStorage::Mixed(segments) => segments.is_empty(), + } + } + + pub fn len(&self) -> usize { + match self { + AsPathStorage::SingleSequence(_) => 1, + AsPathStorage::Mixed(segments) => segments.len(), + } + } + + pub fn switch_to_mixed_storage(&mut self, preserve_single_sequence: bool) -> &mut MixedStorage { + loop { + match self { + AsPathStorage::SingleSequence(seq) => { + if preserve_single_sequence { + let segment = AsPathSegment::AsSequence(Cow::Owned(seq.to_vec())); + *self = AsPathStorage::Mixed(smallvec![segment]); + } else { + *self = AsPathStorage::Mixed(SmallVec::new()) + } + } + AsPathStorage::Mixed(segments) => return segments, + } + } + } +} diff --git a/src/models/bgp/aspath/tests.rs b/src/models/bgp/aspath/tests.rs new file mode 100644 index 0000000..67068e3 --- /dev/null +++ b/src/models/bgp/aspath/tests.rs @@ -0,0 +1,74 @@ +use crate::models::*; +use itertools::Itertools; +use std::collections::HashSet; + +#[test] +fn test_aspath_as4path_merge() { + let aspath = AsPath::from_sequence([1, 2, 3, 5]); + let as4path = AsPath::from_sequence([2, 3, 7]); + let newpath = AsPath::merge_aspath_as4path(&aspath, &as4path).unwrap(); + assert_eq!( + newpath.into_segments_iter().next().unwrap(), + AsPathSegment::sequence([1, 2, 3, 7]) + ); +} + +#[test] +fn test_get_origin() { + let aspath = AsPath::from_sequence([1, 2, 3, 5]); + let origins = aspath.get_singular_origin(); + assert_eq!(origins.unwrap(), Asn::from(5)); + + let aspath = AsPath::from_segments(vec![ + AsPathSegment::sequence([1, 2, 3, 5]), + AsPathSegment::set([7, 8]), + ]); + let origins = aspath.iter_origins().map_into::().collect::>(); + assert_eq!(origins, vec![7, 8]); +} + +#[test] +fn test_aspath_route_iter() { + let path = AsPath::from_segments(vec![ + AsPathSegment::set([3, 4]), + AsPathSegment::set([5, 6]), + AsPathSegment::sequence([7, 8]), + ]); + assert_eq!(path.route_len(), 4); + + let mut routes = HashSet::new(); + for route in &path { + assert!(routes.insert(route)); + } + + assert_eq!(routes.len(), 4); + assert!(routes.contains(&vec![ + Asn::from(3), + Asn::from(5), + Asn::from(7), + Asn::from(8) + ])); + assert!(routes.contains(&vec![ + Asn::from(3), + Asn::from(6), + Asn::from(7), + Asn::from(8) + ])); + assert!(routes.contains(&vec![ + Asn::from(4), + Asn::from(5), + Asn::from(7), + Asn::from(8) + ])); + assert!(routes.contains(&vec![ + Asn::from(4), + Asn::from(6), + Asn::from(7), + Asn::from(8) + ])); +} + +#[test] +fn check_hash() { + // TODO: Check that storage have for a single sequence and mixed segments of a single sequence hash the same. +} diff --git a/src/models/bgp/attributes/aspath.rs b/src/models/bgp/attributes/aspath.rs deleted file mode 100644 index 5e121bc..0000000 --- a/src/models/bgp/attributes/aspath.rs +++ /dev/null @@ -1,1014 +0,0 @@ -use crate::models::*; -use itertools::Itertools; -use std::borrow::Cow; -use std::fmt::{Display, Formatter}; -use std::hash::{Hash, Hasher}; -use std::iter::FromIterator; -use std::marker::PhantomData; -use std::mem::discriminant; - -/// Enum of AS path segment. -#[derive(Debug, Clone)] -pub enum AsPathSegment { - AsSequence(Vec), - AsSet(Vec), - ConfedSequence(Vec), - ConfedSet(Vec), -} - -impl AsPathSegment { - /// Shorthand for creating an `AsSequence` segment. - pub fn sequence>(seq: S) -> Self { - AsPathSegment::AsSequence(seq.as_ref().iter().copied().map_into().collect()) - } - - /// Shorthand for creating an `AsSet` segment. - pub fn set>(seq: S) -> Self { - AsPathSegment::AsSet(seq.as_ref().iter().copied().map_into().collect()) - } - - /// Get the number of ASNs this segment adds to the route. For the number of ASNs within the - /// segment use [AsPathSegment::len] instead. - pub fn route_len(&self) -> usize { - match self { - AsPathSegment::AsSequence(v) => v.len(), - AsPathSegment::AsSet(_) => 1, - AsPathSegment::ConfedSequence(_) | AsPathSegment::ConfedSet(_) => 0, - } - } - - /// Ge the total number of ASNs within this segment. For the number of ASNs this segment adds to - /// a packet's route, use [AsPathSegment::route_len] instead. - pub fn len(&self) -> usize { - self.as_ref().len() - } - - /// Returns true if this segment has a length of 0. - pub fn is_empty(&self) -> bool { - self.as_ref().is_empty() - } - - /// Get an iterator over the ASNs within this path segment - pub fn iter(&self) -> <&'_ Self as IntoIterator>::IntoIter { - self.into_iter() - } - - /// Get a mutable iterator over the ASNs within this path segment - pub fn iter_mut(&mut self) -> <&'_ mut Self as IntoIterator>::IntoIter { - self.into_iter() - } - - /// Gets if a segment represents the local members of an autonomous system confederation. - /// Shorthand for `matches!(x, AsPathSegment::ConfedSequence(_) | AsPathSegment::ConfedSet(_))`. - /// - /// - pub fn is_confed(&self) -> bool { - matches!( - self, - AsPathSegment::ConfedSequence(_) | AsPathSegment::ConfedSet(_) - ) - } - - /// Merge two [AsPathSegment]s in place and return if the merge was successful. - /// - /// See [AsPath::coalesce] for more information. - fn merge_in_place(&mut self, other: &mut Self) -> bool { - use AsPathSegment::*; - - match (self, other) { - (AsSequence(x), AsSequence(y)) | (ConfedSequence(x), ConfedSequence(y)) => { - x.extend_from_slice(y); - true - } - (x @ (AsSequence(_) | ConfedSequence(_)), y) if x.is_empty() => { - std::mem::swap(x, y); - true - } - (_, AsSequence(y) | ConfedSequence(y)) if y.is_empty() => true, - _ => false, - } - } - - /// A much more aggressive version of [AsPathSegment::merge_in_place] which de-duplicates and - /// converts sets with only 1 ASN to sequences. - /// - /// See [AsPath::dedup_coalesce] for more information. - fn dedup_merge_in_place(&mut self, other: &mut Self) -> bool { - use AsPathSegment::*; - - other.dedup(); - match (self, other) { - (AsSequence(x), AsSequence(y)) | (ConfedSequence(x), ConfedSequence(y)) => { - x.extend_from_slice(y); - x.dedup(); - true - } - (x @ (AsSequence(_) | ConfedSequence(_)), y) if x.is_empty() => { - std::mem::swap(x, y); - true - } - (_, AsSequence(y) | ConfedSequence(y)) if y.is_empty() => true, - _ => false, - } - } - - /// Deduplicate ASNs in this path segment. Additionally, sets are sorted and may be converted to - /// sequences if they only have a single element. - /// - /// See [AsPath::dedup_coalesce] for more information. - fn dedup(&mut self) { - match self { - AsPathSegment::AsSequence(x) | AsPathSegment::ConfedSequence(x) => x.dedup(), - AsPathSegment::AsSet(x) => { - x.sort_unstable(); - x.dedup(); - if x.len() == 1 { - *self = AsPathSegment::AsSequence(std::mem::take(x)); - } - } - AsPathSegment::ConfedSet(x) => { - x.sort_unstable(); - x.dedup(); - if x.len() == 1 { - *self = AsPathSegment::ConfedSequence(std::mem::take(x)); - } - } - } - } -} - -impl IntoIterator for AsPathSegment { - type Item = Asn; - type IntoIter = std::vec::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - let (AsPathSegment::AsSequence(x) - | AsPathSegment::AsSet(x) - | AsPathSegment::ConfedSequence(x) - | AsPathSegment::ConfedSet(x)) = self; - x.into_iter() - } -} - -impl<'a> IntoIterator for &'a AsPathSegment { - type Item = &'a Asn; - type IntoIter = std::slice::Iter<'a, Asn>; - - fn into_iter(self) -> Self::IntoIter { - let (AsPathSegment::AsSequence(x) - | AsPathSegment::AsSet(x) - | AsPathSegment::ConfedSequence(x) - | AsPathSegment::ConfedSet(x)) = self; - x.iter() - } -} - -impl<'a> IntoIterator for &'a mut AsPathSegment { - type Item = &'a mut Asn; - type IntoIter = std::slice::IterMut<'a, Asn>; - - fn into_iter(self) -> Self::IntoIter { - let (AsPathSegment::AsSequence(x) - | AsPathSegment::AsSet(x) - | AsPathSegment::ConfedSequence(x) - | AsPathSegment::ConfedSet(x)) = self; - x.iter_mut() - } -} - -impl AsRef<[Asn]> for AsPathSegment { - fn as_ref(&self) -> &[Asn] { - let (AsPathSegment::AsSequence(x) - | AsPathSegment::AsSet(x) - | AsPathSegment::ConfedSequence(x) - | AsPathSegment::ConfedSet(x)) = self; - x - } -} - -impl Hash for AsPathSegment { - fn hash(&self, state: &mut H) { - // Hash the discriminant since we do not differentiate between confederation segments - discriminant(self).hash(state); - - let set = match self { - AsPathSegment::AsSequence(x) | AsPathSegment::ConfedSequence(x) => { - return x.hash(state) - } - AsPathSegment::AsSet(x) | AsPathSegment::ConfedSet(x) => x, - }; - - // FIXME: Once is_sorted is stabilized, call it first to determine if sorting is required - if set.len() <= 32 { - let mut buffer = [Asn::new_32bit(0); 32]; - set.iter() - .zip(&mut buffer) - .for_each(|(asn, buffer)| *buffer = *asn); - - let slice = &mut buffer[..set.len()]; - slice.sort_unstable(); - Asn::hash_slice(slice, state); - return; - } - - // Fallback to allocating a Vec on the heap to sort - set.iter().sorted().for_each(|x| x.hash(state)); - } -} - -/// Check for equality of two path segments. -/// ```rust -/// # use bgpkit_parser::models::AsPathSegment; -/// let a = AsPathSegment::sequence([1, 2, 3]); -/// let b = AsPathSegment::set([1, 2, 3]); -/// -/// // Sequences must be identical to be considered equivalent -/// assert_eq!(a, AsPathSegment::sequence([1, 2, 3])); -/// assert_ne!(a, AsPathSegment::sequence([1, 2, 3, 3])); -/// -/// // Sets may be reordered, but must contain exactly the same ASNs. -/// assert_eq!(b, AsPathSegment::set([3, 1, 2])); -/// assert_ne!(b, AsPathSegment::set([1, 2, 3, 3])); -/// ``` -impl PartialEq for AsPathSegment { - fn eq(&self, other: &Self) -> bool { - let (x, y) = match (self, other) { - (AsPathSegment::AsSequence(x), AsPathSegment::AsSequence(y)) - | (AsPathSegment::ConfedSequence(x), AsPathSegment::ConfedSequence(y)) => { - return x == y - } - (AsPathSegment::AsSet(x), AsPathSegment::AsSet(y)) - | (AsPathSegment::ConfedSet(x), AsPathSegment::ConfedSet(y)) => (x, y), - _ => return false, - }; - - // Attempt to exit early - if x.len() != y.len() { - return false; - } else if x == y { - return true; - } - - if x.len() <= 32 { - let mut x_buffer = [Asn::new_32bit(0); 32]; - let mut y_buffer = [Asn::new_32bit(0); 32]; - x.iter() - .zip(&mut x_buffer) - .for_each(|(asn, buffer)| *buffer = *asn); - y.iter() - .zip(&mut y_buffer) - .for_each(|(asn, buffer)| *buffer = *asn); - - x_buffer[..x.len()].sort_unstable(); - y_buffer[..y.len()].sort_unstable(); - return x_buffer[..x.len()] == y_buffer[..y.len()]; - } - - x.iter() - .sorted() - .zip(y.iter().sorted()) - .all(|(a, b)| a == b) - } -} - -impl Eq for AsPathSegment {} - -/// This is not a perfect solution since it is theoretically possible that a path could be created -/// with more variations than a u64. That being said, the chances of such a thing occurring are -/// essentially non-existent unless a BGP peer begins announcing maliciously constructed paths. -struct AsPathNumberedRouteIter<'a> { - path: &'a [AsPathSegment], - index: usize, - route_num: u64, -} - -impl<'a> Iterator for AsPathNumberedRouteIter<'a> { - type Item = Asn; - - fn next(&mut self) -> Option { - loop { - match self.path.first()? { - AsPathSegment::AsSequence(x) => match x.get(self.index) { - None => { - self.index = 0; - self.path = &self.path[1..]; - } - Some(asn) => { - self.index += 1; - return Some(*asn); - } - }, - AsPathSegment::AsSet(x) => { - self.path = &self.path[1..]; - if x.is_empty() { - return Some(Asn::RESERVED); - } - - let asn = x[(self.route_num % x.len() as u64) as usize]; - self.route_num /= x.len() as u64; - return Some(asn); - } - _ => self.path = &self.path[1..], - } - } - } -} - -pub struct AsPathRouteIter<'a, D> { - path: Cow<'a, [AsPathSegment]>, - route_num: u64, - total_routes: u64, - _phantom: PhantomData, -} - -impl<'a, D> Iterator for AsPathRouteIter<'a, D> -where - D: FromIterator, -{ - type Item = D; - - fn next(&mut self) -> Option { - if self.route_num >= self.total_routes { - return None; - } - - // Attempt to speed up what is by far the most common case (a path of a single sequence) - if self.route_num == 0 && self.path.len() == 1 { - if let AsPathSegment::AsSequence(sequence) = &self.path[0] { - let route = D::from_iter(sequence.iter().copied()); - self.route_num += 1; - return Some(route); - } - } - - let route_asn_iter = AsPathNumberedRouteIter { - path: self.path.as_ref(), - index: 0, - route_num: self.route_num, - }; - - self.route_num += 1; - Some(D::from_iter(route_asn_iter)) - } -} - -#[derive(Debug, PartialEq, Clone, Eq, Default, Hash)] -pub struct AsPath { - pub segments: Vec, -} - -// Define iterator type aliases. The storage mechanism and by extension the iterator types may -// change later, but these types should remain consistent. -pub type SegmentIter<'a> = std::slice::Iter<'a, AsPathSegment>; -pub type SegmentIterMut<'a> = std::slice::IterMut<'a, AsPathSegment>; -pub type SegmentIntoIter = std::vec::IntoIter; - -impl AsPath { - pub fn new() -> AsPath { - AsPath { segments: vec![] } - } - - /// Shorthand for creating an `AsPath` consisting of a single `AsSequence` segment. - pub fn from_sequence>(seq: S) -> Self { - let segment = AsPathSegment::AsSequence(seq.as_ref().iter().copied().map_into().collect()); - - AsPath { - segments: vec![segment], - } - } - - pub fn from_segments(segments: Vec) -> AsPath { - AsPath { segments } - } - - /// Adds a new segment to the end of the path. This will change the origin of the path. No - /// validation or merging the segment is performed during this step. - pub fn append_segment(&mut self, segment: AsPathSegment) { - self.segments.push(segment); - } - - /// Check if the path is empty. Note that a non-empty path may have a route length of 0 due to - /// empty segments or confederation segments. - pub fn is_empty(&self) -> bool { - self.segments.is_empty() - } - - /// Get the total length of the routes this path represents. For example, if this route - /// contained a sequence of 5 ASNs followed by a set of 3 ASNs, the total route length would be - /// 6. - /// - /// Confederation segments do not count towards the total route length. This means it is - /// possible to have a non-empty AsPath with a length of 0. - pub fn route_len(&self) -> usize { - self.segments.iter().map(AsPathSegment::route_len).sum() - } - - /// Get the number of segments that make up this path. For the number of ASNs in routes - /// represented by this path, use [AsPath::route_len]. - pub fn len(&self) -> usize { - self.segments.len() - } - - /// Get the total number of routes this path represents. This function assumes the total number - /// of route variations can be represented by a u64. - pub fn num_route_variations(&self) -> u64 { - let mut variations: u64 = 1; - - for segment in &self.segments { - if let AsPathSegment::AsSet(x) = segment { - variations *= x.len() as u64; - } - } - - variations - } - - /// Checks if any segments of this [AsPath] contain the following ASN. - pub fn contains_asn(&self, x: Asn) -> bool { - self.iter_segments().flatten().contains(&x) - } - - /// Coalesce this [AsPath] into the minimum number of segments required without changing the - /// values along the path. This can be helpful as some BGP servers will prepend additional - /// segments without coalescing sequences. For de-duplicating see [AsPath::dedup_coalesce]. - /// - /// Changes applied by this function: - /// - Merge adjacent AS_SEQUENCE segments - /// - Merge adjacent AS_CONFED_SEQUENCE segments - /// - Removing empty AS_SEQUENCE and AS_CONFED_SEQUENCE segments - /// - /// ```rust - /// # use bgpkit_parser::models::{AsPath, AsPathSegment}; - /// let mut a = AsPath::from_segments(vec![ - /// AsPathSegment::sequence([1, 2]), - /// AsPathSegment::sequence([]), - /// AsPathSegment::sequence([2]), - /// AsPathSegment::set([2]), - /// AsPathSegment::set([5, 3, 3, 2]), - /// ]); - /// - /// let expected = AsPath::from_segments(vec![ - /// AsPathSegment::sequence([1, 2, 2]), - /// AsPathSegment::set([2]), - /// AsPathSegment::set([5, 3, 3, 2]), - /// ]); - /// - /// a.coalesce(); - /// assert_eq!(a, expected); - /// ``` - /// If there is only one segment, no changes will occur. This function will not attempt to - /// deduplicate sequences or alter sets. - pub fn coalesce(&mut self) { - let mut end_index = 0; - let mut scan_index = 1; - - while scan_index < self.segments.len() { - let (a, b) = self.segments.split_at_mut(scan_index); - if !AsPathSegment::merge_in_place(&mut a[end_index], &mut b[0]) { - end_index += 1; - self.segments.swap(end_index, scan_index); - } - scan_index += 1; - } - - self.segments.truncate(end_index + 1); - } - - /// A more aggressive version of [AsPath::coalesce] which also de-duplicates ASNs within this - /// path and converts sets of a single ASN to sequences. Some BGP servers will prepend their own - /// ASN multiple times when announcing a path to artificially increase the route length and make - /// the route seem less less desirable to peers.This function is best suited for use-cases which - /// only care about transitions between ASes along the path. - /// - /// Changes applied by this function: - /// - Merge adjacent AS_SEQUENCE segments - /// - Merge adjacent AS_CONFED_SEQUENCE segments - /// - Removing empty AS_SEQUENCE and AS_CONFED_SEQUENCE segments - /// - De-duplicate ASNs in AS_SEQUENCE and AS_CONFED_SEQUENCE segments - /// - Sort and de-duplicate ASNs in AS_SET and AS_CONFED_SET segments - /// - Convert AS_SET and AS_CONFED_SET segments with exactly 1 element to sequences - /// - /// ```rust - /// # use bgpkit_parser::models::{AsPath, AsPathSegment}; - /// let mut a = AsPath::from_segments(vec![ - /// AsPathSegment::sequence([1, 2]), - /// AsPathSegment::sequence([]), - /// AsPathSegment::sequence([2]), - /// AsPathSegment::set([2]), - /// AsPathSegment::set([5, 3, 3, 2]), - /// ]); - /// - /// let expected = AsPath::from_segments(vec![ - /// AsPathSegment::sequence([1, 2]), - /// AsPathSegment::set([2, 3, 5]), - /// ]); - /// - /// a.dedup_coalesce(); - /// assert_eq!(a, expected); - /// ``` - pub fn dedup_coalesce(&mut self) { - if !self.segments.is_empty() { - self.segments[0].dedup(); - } - let mut end_index = 0; - let mut scan_index = 1; - - while scan_index < self.segments.len() { - let (a, b) = self.segments.split_at_mut(scan_index); - if !AsPathSegment::dedup_merge_in_place(&mut a[end_index], &mut b[0]) { - end_index += 1; - self.segments.swap(end_index, scan_index); - } - scan_index += 1; - } - - self.segments.truncate(end_index + 1); - } - - /// Checks if two paths correspond to equivalent routes. Unlike `a == b`, this function will - /// ignore duplicate ASNs by comparing the coalesced versions of each path. - /// - /// This is equivalent to [AsPath::eq] after calling [AsPath::dedup_coalesce] on both paths. - pub fn has_equivalent_routing(&self, other: &Self) -> bool { - let mut a = self.to_owned(); - let mut b = other.to_owned(); - - a.dedup_coalesce(); - b.dedup_coalesce(); - - a == b - } - - /// Get the length of ASN required to store all of the ASNs within this path - pub fn required_asn_length(&self) -> AsnLength { - self.iter_segments().flatten().map(Asn::required_len).fold( - AsnLength::Bits16, - |a, b| match (a, b) { - (AsnLength::Bits16, AsnLength::Bits16) => AsnLength::Bits16, - _ => AsnLength::Bits32, - }, - ) - } - - pub fn iter_segments(&self) -> SegmentIter<'_> { - self.segments.iter() - } - - pub fn iter_segments_mut(&mut self) -> SegmentIterMut<'_> { - self.segments.iter_mut() - } - - pub fn into_segments_iter(self) -> SegmentIntoIter { - self.segments.into_iter() - } - - /// Gets an iterator over all possible routes this path represents. - pub fn iter_routes(&self) -> AsPathRouteIter<'_, D> - where - D: FromIterator, - { - AsPathRouteIter { - path: Cow::Borrowed(&self.segments), - route_num: 0, - total_routes: self.num_route_variations(), - _phantom: PhantomData, - } - } - - /// Construct AsPath from AS_PATH and AS4_PATH - /// - /// - /// - /// ```text - /// If the number of AS numbers in the AS_PATH attribute is less than the - /// number of AS numbers in the AS4_PATH attribute, then the AS4_PATH - /// attribute SHALL be ignored, and the AS_PATH attribute SHALL be taken - /// as the AS path information. - /// - /// If the number of AS numbers in the AS_PATH attribute is larger than - /// or equal to the number of AS numbers in the AS4_PATH attribute, then - /// the AS path information SHALL be constructed by taking as many AS - /// numbers and path segments as necessary from the leading part of the - /// AS_PATH attribute, and then prepending them to the AS4_PATH attribute - /// so that the AS path information has a number of AS numbers identical - /// to that of the AS_PATH attribute. Note that a valid - /// AS_CONFED_SEQUENCE or AS_CONFED_SET path segment SHALL be prepended - /// if it is either the leading path segment or is adjacent to a path - /// segment that is prepended. - /// ``` - pub fn merge_aspath_as4path(aspath: &AsPath, as4path: &AsPath) -> Option { - if aspath.route_len() < as4path.route_len() { - return Some(aspath.clone()); - } - - let mut as4iter = as4path.segments.iter(); - let mut as4seg = as4iter.next(); - let mut new_segs: Vec = vec![]; - if as4seg.is_none() { - new_segs.extend(aspath.segments.clone()); - return Some(AsPath { segments: new_segs }); - } - - for seg in &aspath.segments { - let as4seg_unwrapped = as4seg.unwrap(); - if let (AsPathSegment::AsSequence(seq), AsPathSegment::AsSequence(seq4)) = - (seg, as4seg_unwrapped) - { - let diff_len = seq.len() - seq4.len(); - let mut new_seq: Vec = vec![]; - new_seq.extend(seq.iter().take(diff_len)); - new_seq.extend(seq4); - new_segs.push(AsPathSegment::AsSequence(new_seq)); - } else { - new_segs.push(as4seg_unwrapped.clone()); - } - as4seg = as4iter.next(); - } - - Some(AsPath { segments: new_segs }) - } - - /// Iterate through the originating ASNs of this path. This functionality is provided for - /// completeness, but in almost all cases this iterator should only contain a single element. - /// Alternatively, [AsPath::get_singular_origin] can be used if - pub fn iter_origins(&self) -> impl '_ + Iterator { - let origin_slice = match self.segments.last() { - Some(AsPathSegment::AsSequence(v)) => v.last().map(std::slice::from_ref).unwrap_or(&[]), - Some(AsPathSegment::AsSet(v)) => v.as_ref(), - _ => &[], - }; - - origin_slice.iter().copied() - } - - /// This function serves as a alternative to [AsPath::iter_origins] which attempts to make the - /// assumption that a path can only have exactly one origin. If a path does not have exactly 1 - /// origin (such as when empty or ending in a set), then `None` will be returned instead. - pub fn get_singular_origin(&self) -> Option { - match self.segments.last() { - Some(AsPathSegment::AsSequence(v)) => v.last().copied(), - Some(AsPathSegment::AsSet(v)) if v.len() == 1 => Some(v[0]), - _ => None, - } - } -} - -/// Iterates over all route variations the given `AsPath` represents. -impl<'a> IntoIterator for &'a AsPath { - type Item = Vec; - type IntoIter = AsPathRouteIter<'a, Vec>; - - fn into_iter(self) -> Self::IntoIter { - self.iter_routes() - } -} - -/// Iterates over all route variations the given `AsPath` represents. -impl IntoIterator for AsPath { - type Item = Vec; - type IntoIter = AsPathRouteIter<'static, Vec>; - - fn into_iter(self) -> Self::IntoIter { - AsPathRouteIter { - total_routes: self.num_route_variations(), - path: Cow::Owned(self.segments), - route_num: 0, - _phantom: PhantomData, - } - } -} - -impl Display for AsPath { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - for (index, segment) in self.iter_segments().enumerate() { - if index != 0 { - write!(f, " ")?; - } - - match segment { - AsPathSegment::AsSequence(v) | AsPathSegment::ConfedSequence(v) => { - let mut asn_iter = v.iter(); - if let Some(first_element) = asn_iter.next() { - write!(f, "{}", first_element)?; - - for asn in asn_iter { - write!(f, " {}", asn)?; - } - } - } - AsPathSegment::AsSet(v) | AsPathSegment::ConfedSet(v) => { - write!(f, "{{")?; - let mut asn_iter = v.iter(); - if let Some(first_element) = asn_iter.next() { - write!(f, "{}", first_element)?; - - for asn in asn_iter { - write!(f, ",{}", asn)?; - } - } - write!(f, "}}")?; - } - } - } - - Ok(()) - } -} - -#[cfg(feature = "serde")] -mod serde_impl { - use super::*; - use serde::de::{SeqAccess, Visitor}; - use serde::ser::SerializeSeq; - use serde::{Deserialize, Deserializer, Serialize, Serializer}; - use std::borrow::Cow; - - /// Segment type names using names from RFC3065. - /// - /// - #[allow(non_camel_case_types)] - #[derive(Serialize, Deserialize)] - enum SegmentType { - AS_SET, - AS_SEQUENCE, - AS_CONFED_SEQUENCE, - AS_CONFED_SET, - } - - #[derive(Serialize, Deserialize)] - struct VerboseSegment<'s> { - ty: SegmentType, - values: Cow<'s, [Asn]>, - } - - impl Serialize for AsPathSegment { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let (ty, elements) = match self { - AsPathSegment::AsSequence(x) => (SegmentType::AS_SEQUENCE, x.as_ref()), - AsPathSegment::AsSet(x) => (SegmentType::AS_SET, x.as_ref()), - AsPathSegment::ConfedSequence(x) => (SegmentType::AS_CONFED_SEQUENCE, x.as_ref()), - AsPathSegment::ConfedSet(x) => (SegmentType::AS_CONFED_SET, x.as_ref()), - }; - - let verbose = VerboseSegment { - ty, - values: Cow::Borrowed(elements), - }; - - verbose.serialize(serializer) - } - } - - impl<'de> Deserialize<'de> for AsPathSegment { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let verbose = VerboseSegment::deserialize(deserializer)?; - - let values = verbose.values.into_owned(); - match verbose.ty { - SegmentType::AS_SET => Ok(AsPathSegment::AsSet(values)), - SegmentType::AS_SEQUENCE => Ok(AsPathSegment::AsSequence(values)), - SegmentType::AS_CONFED_SEQUENCE => Ok(AsPathSegment::ConfedSequence(values)), - SegmentType::AS_CONFED_SET => Ok(AsPathSegment::ConfedSet(values)), - } - } - } - - /// Check if we can serialize an `AsPath` using the simplified format and get the number of - /// elements to do so. The ambiguities that could prevent us from doing so are confederation - /// segments and adjacent sequence segments. - fn simplified_format_len(segments: &[AsPathSegment]) -> Option { - let mut elements = 0; - let mut prev_was_sequence = false; - for segment in segments { - match segment { - AsPathSegment::AsSequence(seq) if !prev_was_sequence => { - prev_was_sequence = true; - elements += seq.len(); - } - AsPathSegment::AsSet(_) => { - prev_was_sequence = false; - elements += 1; - } - _ => return None, - } - } - - Some(elements) - } - - /// # Serialization format - /// For the sake of readability and ease of use within other applications, there are verbose and - /// simplified variants for serialization. - /// - /// ## Simplified format - /// The simplified format is the default preferred serialization format. This format does not - /// cover confederation segments and involves a single list of ASNs within the path sequence. - /// For sets, a list of set members is used in place of an ASN. - /// ```rust - /// # use bgpkit_parser::models::{Asn, AsPath}; - /// # use bgpkit_parser::models::AsPathSegment::*; - /// - /// let a: AsPath = serde_json::from_str("[123, 942, 102]").unwrap(); - /// let b: AsPath = serde_json::from_str("[231, 432, [643, 836], 352]").unwrap(); - /// - /// assert_eq!(&a.segments, &[ - /// AsSequence(vec![Asn::from(123), Asn::from(942), Asn::from(102)]) - /// ]); - /// assert_eq!(&b.segments, &[ - /// AsSequence(vec![Asn::from(231), Asn::from(432)]), - /// AsSet(vec![Asn::from(643), Asn::from(836)]), - /// AsSequence(vec![Asn::from(352)]) - /// ]); - /// ``` - /// - /// ## Verbose format - /// The verbose format serves as the fallback format for when the simplified format can not be - /// used due to ambiguity. This happens when confederation segments are present, or multiple - /// sequences occur back to back. In this format, segments are explicitly seperated and labeled. - /// Segment types, denoted by the `ty` field, correspond to the names used within RFC3065 - /// (`AS_SET`, `AS_SEQUENCE`, `AS_CONFED_SEQUENCE`, `AS_CONFED_SET`). - /// ```rust - /// # use bgpkit_parser::models::{Asn, AsPath}; - /// # use bgpkit_parser::models::AsPathSegment::*; - /// - /// let a = r#"[ - /// { "ty": "AS_CONFED_SEQUENCE", "values": [123, 942] }, - /// { "ty": "AS_SEQUENCE", "values": [773] }, - /// { "ty": "AS_SEQUENCE", "values": [382, 293] } - /// ]"#; - /// - /// let parsed: AsPath = serde_json::from_str(a).unwrap(); - /// assert_eq!(&parsed.segments, &[ - /// ConfedSequence(vec![Asn::from(123), Asn::from(942)]), - /// AsSequence(vec![Asn::from(773)]), - /// AsSequence(vec![Asn::from(382), Asn::from(293)]) - /// ]); - /// ``` - impl Serialize for AsPath { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - if let Some(num_elements) = simplified_format_len(&self.segments) { - // Serialize simplified format - let mut seq_serializer = serializer.serialize_seq(Some(num_elements))?; - - for segment in &self.segments { - match segment { - AsPathSegment::AsSequence(elements) => { - elements - .iter() - .try_for_each(|x| seq_serializer.serialize_element(x))?; - } - AsPathSegment::AsSet(x) => seq_serializer.serialize_element(x)?, - _ => unreachable!("simplified_format_len checked for confed segments"), - } - } - return seq_serializer.end(); - } - - // Serialize verbose format - serializer.collect_seq(&self.segments) - } - } - - struct AsPathVisitor; - - impl<'de> Visitor<'de> for AsPathVisitor { - type Value = AsPath; - - fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { - formatter.write_str("list of AS_PATH segments") - } - - fn visit_seq(self, mut seq: A) -> Result - where - A: SeqAccess<'de>, - { - // Technically, we can handle an input that mixes the simplified and verbose formats, - // but we do not want to document this behavior as it may change in future updates. - #[derive(Deserialize)] - #[serde(untagged)] - enum PathElement { - SequenceElement(Asn), - Set(Vec), - Verbose(AsPathSegment), - } - - let mut append_new_sequence = false; - let mut segments = Vec::new(); - while let Some(element) = seq.next_element()? { - match element { - PathElement::SequenceElement(x) => { - if append_new_sequence { - // If the input is mixed between verbose and regular segments, this flag - // is used to prevent appending to a verbose sequence. - append_new_sequence = false; - segments.push(AsPathSegment::AsSequence(Vec::new())); - } - - if let Some(AsPathSegment::AsSequence(last_sequence)) = segments.last_mut() - { - last_sequence.push(x); - } else { - segments.push(AsPathSegment::AsSequence(vec![x])); - } - } - PathElement::Set(values) => { - segments.push(AsPathSegment::AsSet(values)); - } - PathElement::Verbose(verbose) => { - segments.push(verbose); - } - } - } - - Ok(AsPath { segments }) - } - } - - impl<'de> Deserialize<'de> for AsPath { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - deserializer.deserialize_seq(AsPathVisitor) - } - } -} - -#[cfg(test)] -mod tests { - use crate::models::*; - use itertools::Itertools; - use std::collections::HashSet; - - #[test] - fn test_aspath_as4path_merge() { - let aspath = AsPath::from_sequence([1, 2, 3, 5]); - let as4path = AsPath::from_sequence([2, 3, 7]); - let newpath = AsPath::merge_aspath_as4path(&aspath, &as4path).unwrap(); - assert_eq!(newpath.segments[0], AsPathSegment::sequence([1, 2, 3, 7])); - } - - #[test] - fn test_get_origin() { - let aspath = AsPath::from_sequence([1, 2, 3, 5]); - let origins = aspath.get_singular_origin(); - assert_eq!(origins.unwrap(), Asn::from(5)); - - let aspath = AsPath::from_segments(vec![ - AsPathSegment::sequence([1, 2, 3, 5]), - AsPathSegment::set([7, 8]), - ]); - let origins = aspath.iter_origins().map_into::().collect::>(); - assert_eq!(origins, vec![7, 8]); - } - - #[test] - fn test_aspath_route_iter() { - let path = AsPath::from_segments(vec![ - AsPathSegment::set([3, 4]), - AsPathSegment::set([5, 6]), - AsPathSegment::sequence([7, 8]), - ]); - assert_eq!(path.route_len(), 4); - - let mut routes = HashSet::new(); - for route in &path { - assert!(routes.insert(route)); - } - - assert_eq!(routes.len(), 4); - assert!(routes.contains(&vec![ - Asn::from(3), - Asn::from(5), - Asn::from(7), - Asn::from(8) - ])); - assert!(routes.contains(&vec![ - Asn::from(3), - Asn::from(6), - Asn::from(7), - Asn::from(8) - ])); - assert!(routes.contains(&vec![ - Asn::from(4), - Asn::from(5), - Asn::from(7), - Asn::from(8) - ])); - assert!(routes.contains(&vec![ - Asn::from(4), - Asn::from(6), - Asn::from(7), - Asn::from(8) - ])); - } -} diff --git a/src/models/bgp/attributes/mod.rs b/src/models/bgp/attributes/mod.rs index 831998f..5443889 100644 --- a/src/models/bgp/attributes/mod.rs +++ b/src/models/bgp/attributes/mod.rs @@ -1,13 +1,14 @@ //! BGP attribute structs -mod aspath; mod nlri; mod origin; use crate::models::network::*; use bitflags::bitflags; use num_enum::{FromPrimitive, IntoPrimitive}; +use smallvec::SmallVec; use std::cmp::Ordering; use std::iter::{FromIterator, Map}; +use std::mem::size_of; use std::net::IpAddr; use std::ops::Deref; use std::slice::Iter; @@ -15,7 +16,6 @@ use std::vec::IntoIter; use crate::models::*; -pub use aspath::*; pub use nlri::*; pub use origin::*; @@ -237,14 +237,14 @@ impl Attributes { }) } - pub fn get_reachable(&self) -> Option<&Nlri> { + pub fn get_reachable(&self) -> Option<&ReachableNlri> { self.inner.iter().find_map(|x| match &x.value { AttributeValue::MpReachNlri(x) => Some(x), _ => None, }) } - pub fn get_unreachable(&self) -> Option<&Nlri> { + pub fn get_unreachable(&self) -> Option<&UnreachableNlri> { self.inner.iter().find_map(|x| match &x.value { AttributeValue::MpUnreachNlri(x) => Some(x), _ => None, @@ -449,18 +449,28 @@ pub enum AttributeValue { id: BgpIdentifier, is_as4: bool, }, - Communities(Vec), - ExtendedCommunities(Vec), - LargeCommunities(Vec), + Communities(Communities), + ExtendedCommunities(ExtendedCommunities), + LargeCommunities(LargeCommunities), OriginatorId(BgpIdentifier), - Clusters(Vec), - MpReachNlri(Nlri), - MpUnreachNlri(Nlri), - Development(Vec), + Clusters(ClusterList), + MpReachNlri(ReachableNlri), + MpUnreachNlri(UnreachableNlri), + Development(Development), Deprecated(AttrRaw), Unknown(AttrRaw), } +const STORAGE_SIZE_LIMIT: usize = 64; + +pub type Communities = SmallVec<[Community; STORAGE_SIZE_LIMIT / size_of::()]>; +pub type ExtendedCommunities = + SmallVec<[ExtendedCommunity; STORAGE_SIZE_LIMIT / size_of::()]>; +pub type LargeCommunities = + SmallVec<[LargeCommunity; STORAGE_SIZE_LIMIT / size_of::()]>; +pub type ClusterList = SmallVec<[u32; STORAGE_SIZE_LIMIT / size_of::()]>; +pub type Development = SmallVec<[u8; STORAGE_SIZE_LIMIT]>; + impl From for AttributeValue { fn from(value: Origin) -> Self { AttributeValue::Origin(value) diff --git a/src/models/bgp/attributes/nlri.rs b/src/models/bgp/attributes/nlri.rs index b8dae10..2bd2b4e 100644 --- a/src/models/bgp/attributes/nlri.rs +++ b/src/models/bgp/attributes/nlri.rs @@ -1,23 +1,45 @@ +use crate::models::attributes::STORAGE_SIZE_LIMIT; use crate::models::*; use ipnet::IpNet; +use smallvec::SmallVec; use std::fmt::Debug; use std::iter::Map; +use std::mem::size_of; use std::net::IpAddr; -use std::ops::Deref; use std::slice::Iter; -use std::vec::IntoIter; -/// Network Layer Reachability Information +/// TODO: Create a PrefixListStorage with variants for IPv4/IPv6 with/without add path. +pub type PrefixList = SmallVec< + [NetworkPrefix; + (STORAGE_SIZE_LIMIT - size_of::<(Afi, Safi, NextHopAddress)>()) + / size_of::()], +>; + #[derive(Debug, PartialEq, Clone, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct Nlri { +pub struct ReachableNlri { pub afi: Afi, pub safi: Safi, - pub next_hop: Option, - pub prefixes: Vec, + pub next_hop: NextHopAddress, + pub prefixes: PrefixList, } -impl Nlri { +impl ReachableNlri { + #[inline] + pub const fn new( + afi: Afi, + safi: Safi, + next_hop: NextHopAddress, + prefixes: PrefixList, + ) -> ReachableNlri { + ReachableNlri { + afi, + safi, + next_hop, + prefixes, + } + } + /// Returns true if this NLRI refers to the IPv4 address space. pub const fn is_ipv4(&self) -> bool { matches!(self.afi, Afi::Ipv4) @@ -28,40 +50,44 @@ impl Nlri { matches!(self.afi, Afi::Ipv6) } - /// Returns true if this NLRI refers to reachable prefixes - pub const fn is_reachable(&self) -> bool { - self.next_hop.is_some() + pub const fn address_family(&self) -> Afi { + self.afi + } + + pub const fn safi(&self) -> Safi { + self.safi + } + + pub const fn next_hop(&self) -> NextHopAddress { + self.next_hop } /// Get the address of the next hop indicated by this NLRI. /// /// Panics if used on a unreachable NLRI message (ie. there is no next hop). pub const fn next_hop_addr(&self) -> IpAddr { - match self.next_hop { - Some(next_hop) => next_hop.addr(), - None => panic!("unreachable NLRI "), - } + self.next_hop.addr() } -} -impl Deref for Nlri { - type Target = Vec; + pub fn iter_with_path_id(&self) -> <&'_ PrefixList as IntoIterator>::IntoIter { + self.prefixes.iter() + } - fn deref(&self) -> &Self::Target { - &self.prefixes + pub fn into_iter_with_path_id(self) -> ::IntoIter { + self.prefixes.into_iter() } } -impl IntoIterator for Nlri { +impl IntoIterator for ReachableNlri { type Item = IpNet; - type IntoIter = Map, fn(NetworkPrefix) -> IpNet>; + type IntoIter = Map<::IntoIter, fn(NetworkPrefix) -> IpNet>; fn into_iter(self) -> Self::IntoIter { self.prefixes.into_iter().map(|x| x.prefix) } } -impl<'a> IntoIterator for &'a Nlri { +impl<'a> IntoIterator for &'a ReachableNlri { type Item = &'a IpNet; type IntoIter = Map, fn(&NetworkPrefix) -> &IpNet>; @@ -70,45 +96,65 @@ impl<'a> IntoIterator for &'a Nlri { } } -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, PartialEq, Clone, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct MpReachableNlri { - afi: Afi, - safi: Safi, - next_hop: NextHopAddress, - prefixes: Vec, +pub struct UnreachableNlri { + pub afi: Afi, + pub safi: Safi, + pub prefixes: PrefixList, } -impl MpReachableNlri { - pub fn new( - afi: Afi, - safi: Safi, - next_hop: NextHopAddress, - prefixes: Vec, - ) -> MpReachableNlri { - MpReachableNlri { +impl UnreachableNlri { + #[inline] + pub const fn new(afi: Afi, safi: Safi, prefixes: PrefixList) -> UnreachableNlri { + UnreachableNlri { afi, safi, - next_hop, prefixes, } } + + pub const fn address_family(&self) -> Afi { + self.afi + } + + pub const fn safi(&self) -> Safi { + self.safi + } + + /// Returns true if this NLRI refers to the IPv4 address space. + pub const fn is_ipv4(&self) -> bool { + matches!(self.afi, Afi::Ipv4) + } + + /// Returns true if this NLRI refers to the IPv6 address space. + pub const fn is_ipv6(&self) -> bool { + matches!(self.afi, Afi::Ipv6) + } + + pub fn iter_with_path_id(&self) -> <&'_ PrefixList as IntoIterator>::IntoIter { + self.prefixes.iter() + } + + pub fn into_iter_with_path_id(self) -> ::IntoIter { + self.prefixes.into_iter() + } } -#[derive(Debug, PartialEq, Clone)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct MpUnreachableNlri { - afi: Afi, - safi: Safi, - prefixes: Vec, +impl IntoIterator for UnreachableNlri { + type Item = IpNet; + type IntoIter = Map<::IntoIter, fn(NetworkPrefix) -> IpNet>; + + fn into_iter(self) -> Self::IntoIter { + self.prefixes.into_iter().map(|x| x.prefix) + } } -impl MpUnreachableNlri { - pub fn new(afi: Afi, safi: Safi, prefixes: Vec) -> MpUnreachableNlri { - MpUnreachableNlri { - afi, - safi, - prefixes, - } +impl<'a> IntoIterator for &'a UnreachableNlri { + type Item = &'a IpNet; + type IntoIter = Map, fn(&NetworkPrefix) -> &IpNet>; + + fn into_iter(self) -> Self::IntoIter { + self.prefixes.iter().map(|x| &x.prefix) } } diff --git a/src/models/bgp/mod.rs b/src/models/bgp/mod.rs index 8c8484d..77b2221 100644 --- a/src/models/bgp/mod.rs +++ b/src/models/bgp/mod.rs @@ -1,5 +1,6 @@ //! BGP messages and relevant structs. +pub mod aspath; pub mod attributes; pub mod capabilities; pub mod community; @@ -7,6 +8,7 @@ pub mod elem; pub mod error; pub mod role; +pub use aspath::*; pub use attributes::*; pub use capabilities::*; pub use community::*; @@ -114,9 +116,9 @@ pub struct Capability { #[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct BgpUpdateMessage { - pub withdrawn_prefixes: Vec, + pub withdrawn_prefixes: PrefixList, pub attributes: Attributes, - pub announced_prefixes: Vec, + pub announced_prefixes: PrefixList, } #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/src/models/err.rs b/src/models/err.rs deleted file mode 100644 index 2a4bb49..0000000 --- a/src/models/err.rs +++ /dev/null @@ -1,26 +0,0 @@ -use ipnet::AddrParseError; -use std::error::Error; -use std::fmt::{Display, Formatter}; - -#[derive(Debug)] -pub enum BgpModelsError { - PrefixParsingError(String), -} - -impl Display for BgpModelsError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - BgpModelsError::PrefixParsingError(msg) => { - write!(f, "cannot convert str to IP prefix: {}", msg) - } - } - } -} - -impl Error for BgpModelsError {} - -impl From for BgpModelsError { - fn from(err: AddrParseError) -> Self { - BgpModelsError::PrefixParsingError(err.to_string()) - } -} diff --git a/src/models/mod.rs b/src/models/mod.rs index 628fc99..9159e99 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -52,11 +52,9 @@ RFCs. Here is a list of them: */ mod bgp; -mod err; mod mrt; mod network; pub use bgp::*; -pub use err::BgpModelsError; pub use mrt::*; pub use network::*; diff --git a/src/models/network/prefix.rs b/src/models/network/prefix.rs index 4e44ee3..d14182c 100644 --- a/src/models/network/prefix.rs +++ b/src/models/network/prefix.rs @@ -1,5 +1,4 @@ -use crate::models::BgpModelsError; -use ipnet::IpNet; +use ipnet::{AddrParseError, IpNet}; use std::fmt::{Debug, Display, Formatter}; use std::ops::Deref; use std::str::FromStr; @@ -31,7 +30,7 @@ impl Debug for NetworkPrefix { } impl FromStr for NetworkPrefix { - type Err = BgpModelsError; + type Err = AddrParseError; fn from_str(s: &str) -> Result { let prefix = IpNet::from_str(s)?; diff --git a/src/parser/bgp/attributes/attr_01_origin.rs b/src/parser/bgp/attributes/attr_01_origin.rs index 3afbc79..ab8d95f 100644 --- a/src/parser/bgp/attributes/attr_01_origin.rs +++ b/src/parser/bgp/attributes/attr_01_origin.rs @@ -1,16 +1,11 @@ use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::Bytes; use std::convert::TryFrom; -pub fn parse_origin(mut input: Bytes) -> Result { - match Origin::try_from(input.read_u8()?) { - Ok(v) => Ok(AttributeValue::Origin(v)), - Err(_) => Err(ParserError::ParseError( - "Failed to parse attribute type: origin".to_string(), - )), - } +pub fn parse_origin(mut input: &[u8]) -> Result { + input.expect_remaining_eq(1, "ORIGIN")?; + Ok(AttributeValue::Origin(Origin::try_from(input.read_u8()?)?)) } #[cfg(test)] @@ -40,19 +35,19 @@ mod tests { fn test_parse_origin() { assert_eq!( AttributeValue::Origin(Origin::IGP), - parse_origin(Bytes::from_static(&[0u8])).unwrap() + parse_origin(&[0u8]).unwrap() ); assert_eq!( AttributeValue::Origin(Origin::EGP), - parse_origin(Bytes::from_static(&[1u8])).unwrap() + parse_origin(&[1u8]).unwrap() ); assert_eq!( AttributeValue::Origin(Origin::INCOMPLETE), - parse_origin(Bytes::from_static(&[2u8])).unwrap() + parse_origin(&[2u8]).unwrap() ); assert!(matches!( - parse_origin(Bytes::from_static(&[3u8])).unwrap_err(), - ParserError::ParseError(_) + parse_origin(&[3u8]).unwrap_err(), + ParserError::UnrecognizedEnumVariant { .. } )); } } diff --git a/src/parser/bgp/attributes/attr_02_17_as_path.rs b/src/parser/bgp/attributes/attr_02_17_as_path.rs index 3df804e..91edb27 100644 --- a/src/parser/bgp/attributes/attr_02_17_as_path.rs +++ b/src/parser/bgp/attributes/attr_02_17_as_path.rs @@ -1,49 +1,89 @@ +use crate::models::builder::AsPathBuilder; use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::{Buf, Bytes}; - -const AS_PATH_AS_SET: u8 = 1; -const AS_PATH_AS_SEQUENCE: u8 = 2; -// https://datatracker.ietf.org/doc/html/rfc5065 -const AS_PATH_CONFED_SEQUENCE: u8 = 3; -const AS_PATH_CONFED_SET: u8 = 4; - -pub fn parse_as_path(mut input: Bytes, asn_len: &AsnLength) -> Result { - let mut output = AsPath { - segments: Vec::with_capacity(5), - }; - while input.remaining() > 0 { - let segment = parse_as_path_segment(&mut input, asn_len)?; - output.append_segment(segment); +use num_enum::TryFromPrimitive; + +#[allow(non_camel_case_types)] +#[derive(Debug, TryFromPrimitive)] +#[repr(u8)] +enum AsSegmentType { + AS_PATH_AS_SET = 1, + AS_PATH_AS_SEQUENCE = 2, + // https://datatracker.ietf.org/doc/html/rfc5065 + AS_PATH_CONFED_SEQUENCE = 3, + AS_PATH_CONFED_SET = 4, +} + +pub fn parse_as_path(input: &[u8], asn_len: AsnLength) -> Result { + match asn_len { + AsnLength::Bits16 => read_as_path_16bit(input), + AsnLength::Bits32 => read_as_path_32bit(input), + } +} + +fn read_as_path_16bit(mut input: &[u8]) -> Result { + let mut builder = AsPathBuilder::default(); + + while !input.is_empty() { + let segment_type = AsSegmentType::try_from(input.read_u8()?)?; + let count = input.read_u8()? as usize; + input.require_n_remaining(count * 2, "AS_PATH")?; + + let mut segment_builder = match segment_type { + AsSegmentType::AS_PATH_AS_SEQUENCE => builder.begin_as_sequence(count), + AsSegmentType::AS_PATH_AS_SET => builder.begin_as_set(count), + AsSegmentType::AS_PATH_CONFED_SEQUENCE => builder.begin_confed_sequence(count), + AsSegmentType::AS_PATH_CONFED_SET => builder.begin_confed_set(count), + }; + + for _ in 0..count { + segment_builder.push(Asn::new_16bit(input.read_u16()?)); + } } - Ok(output) + Ok(builder.build()) } -fn parse_as_path_segment( - input: &mut Bytes, - asn_len: &AsnLength, -) -> Result { - let segment_type = input.read_u8()?; - let count = input.read_u8()? as usize; - let path = input.read_asns(asn_len, count)?; - match segment_type { - AS_PATH_AS_SET => Ok(AsPathSegment::AsSet(path)), - AS_PATH_AS_SEQUENCE => Ok(AsPathSegment::AsSequence(path)), - AS_PATH_CONFED_SEQUENCE => Ok(AsPathSegment::ConfedSequence(path)), - AS_PATH_CONFED_SET => Ok(AsPathSegment::ConfedSet(path)), - _ => Err(ParserError::ParseError(format!( - "Invalid AS path segment type: {}", - segment_type - ))), +fn read_as_path_32bit(mut input: &[u8]) -> Result { + let mut builder = AsPathBuilder::default(); + + while !input.is_empty() { + let segment_type = AsSegmentType::try_from(input.read_u8()?)?; + let count = input.read_u8()? as usize; + input.require_n_remaining(count * 4, "AS4_PATH")?; + + let mut segment_builder = match segment_type { + AsSegmentType::AS_PATH_AS_SEQUENCE => builder.begin_as_sequence(count), + AsSegmentType::AS_PATH_AS_SET => builder.begin_as_set(count), + AsSegmentType::AS_PATH_CONFED_SEQUENCE => builder.begin_confed_sequence(count), + AsSegmentType::AS_PATH_CONFED_SET => builder.begin_confed_set(count), + }; + + for _ in 0..count { + segment_builder.push(Asn::new_32bit(input.read_u32()?)); + } } + + Ok(builder.build()) } #[cfg(test)] mod tests { use super::*; + fn parse_as_path_segment( + input: &mut &[u8], + asn_len: AsnLength, + ) -> Result, ParserError> { + let path = match asn_len { + AsnLength::Bits16 => read_as_path_16bit(input), + AsnLength::Bits32 => read_as_path_32bit(input), + }?; + + Ok(path.into_segments_iter().next().unwrap()) + } + /// /// ```text /// AS_PATH is a well-known mandatory attribute that is composed @@ -73,14 +113,14 @@ mod tests { /// ``` #[test] fn test_parse_as_path() { - let data = Bytes::from(vec![ + let data = &[ 2, // sequence 3, // 3 ASes in path 0, 1, // AS1 0, 2, // AS2 0, 3, // AS3 - ]); - let path = parse_as_path(data, &AsnLength::Bits16).unwrap(); + ]; + let path = parse_as_path(data, AsnLength::Bits16).unwrap(); assert_eq!(path, AsPath::from_sequence([1, 2, 3])); } @@ -89,62 +129,62 @@ mod tests { ////////////////////// // 16 bits sequence // ////////////////////// - let mut data = Bytes::from(vec![ + let mut data: &[u8] = &[ 2, // sequence 3, // 3 ASes in path 0, 1, // AS1 0, 2, // AS2 0, 3, // AS3 - ]); - let res = parse_as_path_segment(&mut data, &AsnLength::Bits16).unwrap(); + ]; + let res = parse_as_path_segment(&mut data, AsnLength::Bits16).unwrap(); assert_eq!(res, AsPathSegment::sequence([1, 2, 3])); ////////////////////// // 16 bits sequence // ////////////////////// - let mut data = Bytes::from(vec![ + let mut data: &[u8] = &[ 2, // sequence 3, // 3 ASes in path 0, 0, 0, 1, // AS1 0, 0, 0, 2, // AS2 0, 0, 0, 3, // AS3 - ]); - let res = parse_as_path_segment(&mut data, &AsnLength::Bits32).unwrap(); + ]; + let res = parse_as_path_segment(&mut data, AsnLength::Bits32).unwrap(); assert_eq!(res, AsPathSegment::sequence([1, 2, 3])); ///////////////// // other types // ///////////////// - let mut data = Bytes::from(vec![ + let mut data: &[u8] = &[ 1, // AS Set 1, // 1 AS in path 0, 1, - ]); - let res = parse_as_path_segment(&mut data, &AsnLength::Bits16).unwrap(); + ]; + let res = parse_as_path_segment(&mut data, AsnLength::Bits16).unwrap(); assert_eq!(res, AsPathSegment::set([1])); - let mut data = Bytes::from(vec![ + let mut data: &[u8] = &[ 3, // Confed Sequence 1, // 1 AS in path 0, 1, - ]); - let res = parse_as_path_segment(&mut data, &AsnLength::Bits16).unwrap(); + ]; + let res = parse_as_path_segment(&mut data, AsnLength::Bits16).unwrap(); assert!(matches!(res, AsPathSegment::ConfedSequence(_))); - let mut data = Bytes::from(vec![ + let mut data: &[u8] = &[ 4, // Confed Set 1, // 1 AS in path 0, 1, - ]); - let res = parse_as_path_segment(&mut data, &AsnLength::Bits16).unwrap(); + ]; + let res = parse_as_path_segment(&mut data, AsnLength::Bits16).unwrap(); assert!(matches!(res, AsPathSegment::ConfedSet(_))); - let mut data = Bytes::from(vec![ + let mut data: &[u8] = &[ 5, // ERROR 1, // 1 AS in path 0, 1, - ]); - let res = parse_as_path_segment(&mut data, &AsnLength::Bits16).unwrap_err(); - assert!(matches!(res, ParserError::ParseError(_))); + ]; + let res = parse_as_path_segment(&mut data, AsnLength::Bits16).unwrap_err(); + assert!(matches!(res, ParserError::UnrecognizedEnumVariant { .. })); } } diff --git a/src/parser/bgp/attributes/attr_03_next_hop.rs b/src/parser/bgp/attributes/attr_03_next_hop.rs index eaf385b..70d9c72 100644 --- a/src/parser/bgp/attributes/attr_03_next_hop.rs +++ b/src/parser/bgp/attributes/attr_03_next_hop.rs @@ -1,41 +1,43 @@ use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::Bytes; +use std::net::IpAddr; -pub fn parse_next_hop(mut input: Bytes, afi: &Option) -> Result { - if let Some(afi) = afi { - Ok(input.read_address(afi).map(AttributeValue::NextHop)?) - } else { - Ok(input - .read_address(&Afi::Ipv4) - .map(AttributeValue::NextHop)?) +pub fn parse_next_hop(mut input: &[u8], afi: &Option) -> Result { + match afi.unwrap_or(Afi::Ipv4) { + Afi::Ipv4 => { + input.expect_remaining_eq(4, "NEXT_HOP")?; + Ok(input + .read_ipv4_address() + .map(IpAddr::V4) + .map(AttributeValue::NextHop)?) + } + Afi::Ipv6 => { + input.expect_remaining_eq(16, "NEXT_HOP")?; + Ok(input + .read_ipv6_address() + .map(IpAddr::V6) + .map(AttributeValue::NextHop)?) + } } } -pub fn parse_mp_next_hop(mut input: Bytes) -> Result, ParserError> { - let output = match input.len() { - 0 => None, - 4 => Some(input.read_ipv4_address().map(NextHopAddress::Ipv4)?), - 16 => Some(input.read_ipv6_address().map(NextHopAddress::Ipv6)?), - 32 => Some(NextHopAddress::Ipv6LinkLocal( +pub fn parse_mp_next_hop(mut input: &[u8]) -> Result { + match input.len() { + // 0 => Ok(None), + 4 => Ok(input.read_ipv4_address().map(NextHopAddress::Ipv4)?), + 16 => Ok(input.read_ipv6_address().map(NextHopAddress::Ipv6)?), + 32 => Ok(NextHopAddress::Ipv6LinkLocal( input.read_ipv6_address()?, input.read_ipv6_address()?, )), - v => { - return Err(ParserError::ParseError(format!( - "Invalid next hop length found: {}", - v - ))); - } - }; - Ok(output) + v => Err(ParserError::InvalidNextHopLength(v)), + } } #[cfg(test)] mod tests { use super::*; - use bytes::BytesMut; use std::net::{Ipv4Addr, Ipv6Addr}; use std::str::FromStr; @@ -43,8 +45,8 @@ mod tests { fn test_parse_next_hop() { let ipv4 = Ipv4Addr::from_str("10.0.0.1").unwrap(); let ipv6 = Ipv6Addr::from_str("FC00::1").unwrap(); - let ipv4_bytes = Bytes::from(ipv4.octets().to_vec()); - let ipv6_bytes = Bytes::from(ipv6.octets().to_vec()); + let ipv4_bytes = &ipv4.octets(); + let ipv6_bytes = &ipv6.octets(); let res = parse_next_hop(ipv4_bytes, &None).unwrap(); if let AttributeValue::NextHop(n) = res { @@ -63,28 +65,26 @@ mod tests { #[test] fn test_parse_np_next_hop() { - let ipv4 = Bytes::from(Ipv4Addr::from_str("10.0.0.1").unwrap().octets().to_vec()); - let ipv6 = Bytes::from(Ipv6Addr::from_str("fc00::1").unwrap().octets().to_vec()); - let ipv6_2 = Bytes::from(Ipv6Addr::from_str("fc00::2").unwrap().octets().to_vec()); + let ipv4 = Ipv4Addr::from_str("10.0.0.1").unwrap().octets(); + let ipv6 = Ipv6Addr::from_str("fc00::1").unwrap().octets(); + let ipv6_2 = Ipv6Addr::from_str("fc00::2").unwrap().octets(); - if let Some(NextHopAddress::Ipv4(n)) = parse_mp_next_hop(ipv4).unwrap() { + if let NextHopAddress::Ipv4(n) = parse_mp_next_hop(&ipv4).unwrap() { assert_eq!(n.to_string(), "10.0.0.1".to_string()) } else { panic!(); } - if let Some(NextHopAddress::Ipv6(n)) = parse_mp_next_hop(ipv6.clone()).unwrap() { + if let NextHopAddress::Ipv6(n) = parse_mp_next_hop(&ipv6).unwrap() { assert_eq!(n.to_string(), "fc00::1".to_string()) } else { panic!(); } - let mut combined = BytesMut::from(ipv6.to_vec().as_slice()); + let mut combined = ipv6.to_vec(); combined.extend_from_slice(&ipv6_2); - if let Some(NextHopAddress::Ipv6LinkLocal(n, m)) = - parse_mp_next_hop(combined.into()).unwrap() - { + if let NextHopAddress::Ipv6LinkLocal(n, m) = parse_mp_next_hop(&combined).unwrap() { assert_eq!(n.to_string(), "fc00::1".to_string()); assert_eq!(m.to_string(), "fc00::2".to_string()); } else { diff --git a/src/parser/bgp/attributes/attr_04_med.rs b/src/parser/bgp/attributes/attr_04_med.rs index 2562031..71c398d 100644 --- a/src/parser/bgp/attributes/attr_04_med.rs +++ b/src/parser/bgp/attributes/attr_04_med.rs @@ -1,9 +1,9 @@ use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::Bytes; -pub fn parse_med(mut input: Bytes) -> Result { +pub fn parse_med(mut input: &[u8]) -> Result { + input.expect_remaining_eq(4, "MULTI_EXIT_DISCRIMINATOR")?; Ok(AttributeValue::MultiExitDiscriminator(input.read_u32()?)) } @@ -13,9 +13,7 @@ mod tests { #[test] fn test_parse_med() { - if let Ok(AttributeValue::MultiExitDiscriminator(123)) = - parse_med(Bytes::from(vec![0, 0, 0, 123])) - { + if let Ok(AttributeValue::MultiExitDiscriminator(123)) = parse_med(&[0, 0, 0, 123]) { } else { panic!() } diff --git a/src/parser/bgp/attributes/attr_05_local_pref.rs b/src/parser/bgp/attributes/attr_05_local_pref.rs index 9facc13..7c91802 100644 --- a/src/parser/bgp/attributes/attr_05_local_pref.rs +++ b/src/parser/bgp/attributes/attr_05_local_pref.rs @@ -1,9 +1,9 @@ use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::Bytes; -pub fn parse_local_pref(mut input: Bytes) -> Result { +pub fn parse_local_pref(mut input: &[u8]) -> Result { + input.expect_remaining_eq(4, "LOCAL_PREFERENCE")?; Ok(AttributeValue::LocalPreference(input.read_u32()?)) } @@ -13,9 +13,7 @@ mod tests { #[test] fn test_parse_med() { - if let Ok(AttributeValue::LocalPreference(123)) = - parse_local_pref(Bytes::from(vec![0, 0, 0, 123])) - { + if let Ok(AttributeValue::LocalPreference(123)) = parse_local_pref(&[0, 0, 0, 123]) { } else { panic!() } diff --git a/src/parser/bgp/attributes/attr_07_18_aggregator.rs b/src/parser/bgp/attributes/attr_07_18_aggregator.rs index 40b39d4..b0efef1 100644 --- a/src/parser/bgp/attributes/attr_07_18_aggregator.rs +++ b/src/parser/bgp/attributes/attr_07_18_aggregator.rs @@ -1,7 +1,6 @@ use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::{Buf, Bytes}; use log::warn; /// Parse aggregator attribute. @@ -16,20 +15,15 @@ use log::warn; /// IP address SHOULD be the same as the BGP Identifier of the speaker.` /// ``` pub fn parse_aggregator( - mut input: Bytes, - asn_len: &AsnLength, + mut input: &[u8], + asn_len: AsnLength, ) -> Result<(Asn, BgpIdentifier), ParserError> { let asn_len_found = match input.remaining() { 8 => AsnLength::Bits32, 6 => AsnLength::Bits16, - _ => { - return Err(ParserError::ParseError(format!( - "Aggregator attribute length is invalid: found {}, should 6 or 8", - input.remaining() - ))) - } + x => return Err(ParserError::InvalidAggregatorAttrLength(x)), }; - if asn_len_found != *asn_len { + if asn_len_found != asn_len { warn!( "Aggregator attribute with ASN length set to {:?} but found {:?}", asn_len, asn_len_found @@ -54,9 +48,8 @@ mod tests { let mut data = vec![]; data.extend([1u8, 2]); data.extend(identifier.octets()); - let bytes = Bytes::from(data); - if let Ok((asn, n)) = parse_aggregator(bytes, &AsnLength::Bits16) { + if let Ok((asn, n)) = parse_aggregator(&data, AsnLength::Bits16) { assert_eq!(n, identifier); assert_eq!(asn, Asn::new_16bit(258)) } else { @@ -66,9 +59,8 @@ mod tests { let mut data = vec![]; data.extend([0u8, 0, 1, 2]); data.extend(identifier.octets()); - let bytes = Bytes::from(data); - if let Ok((asn, n)) = parse_aggregator(bytes, &AsnLength::Bits32) { + if let Ok((asn, n)) = parse_aggregator(&data, AsnLength::Bits32) { assert_eq!(n, identifier); assert_eq!(asn, Asn::new_16bit(258)) } else { diff --git a/src/parser/bgp/attributes/attr_08_communities.rs b/src/parser/bgp/attributes/attr_08_communities.rs index eedcd3b..8906056 100644 --- a/src/parser/bgp/attributes/attr_08_communities.rs +++ b/src/parser/bgp/attributes/attr_08_communities.rs @@ -1,16 +1,15 @@ use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::{Buf, Bytes}; -pub fn parse_regular_communities(mut input: Bytes) -> Result { +pub fn parse_regular_communities(mut input: &[u8]) -> Result { const COMMUNITY_NO_EXPORT: u32 = 0xFFFFFF01; const COMMUNITY_NO_ADVERTISE: u32 = 0xFFFFFF02; const COMMUNITY_NO_EXPORT_SUBCONFED: u32 = 0xFFFFFF03; - let mut communities = vec![]; + let mut communities = Communities::with_capacity(input.remaining() / 4); - while input.remaining() > 0 { + while !input.is_empty() { let community_val = input.read_u32()?; communities.push(match community_val { COMMUNITY_NO_EXPORT => Community::NoExport, @@ -34,14 +33,12 @@ mod tests { /// Test parsing of communities values, as defined in RFC1997. #[test] fn test_parse_communities() { - if let Ok(AttributeValue::Communities(communities)) = - parse_regular_communities(Bytes::from(vec![ - 0xFF, 0xFF, 0xFF, 0x01, // NoExport - 0xFF, 0xFF, 0xFF, 0x02, // NoAdvertise - 0xFF, 0xFF, 0xFF, 0x03, // NoExportSubConfed - 0x00, 0x7B, 0x01, 0xC8, // Custom(123, 456) - ])) - { + if let Ok(AttributeValue::Communities(communities)) = parse_regular_communities(&[ + 0xFF, 0xFF, 0xFF, 0x01, // NoExport + 0xFF, 0xFF, 0xFF, 0x02, // NoAdvertise + 0xFF, 0xFF, 0xFF, 0x03, // NoExportSubConfed + 0x00, 0x7B, 0x01, 0xC8, // Custom(123, 456) + ]) { assert_eq!(communities.len(), 4); assert_eq!(communities[0], Community::NoExport); assert_eq!(communities[1], Community::NoAdvertise); diff --git a/src/parser/bgp/attributes/attr_09_originator.rs b/src/parser/bgp/attributes/attr_09_originator.rs index b4a22ce..5e43818 100644 --- a/src/parser/bgp/attributes/attr_09_originator.rs +++ b/src/parser/bgp/attributes/attr_09_originator.rs @@ -1,14 +1,9 @@ use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::{Buf, Bytes}; -pub fn parse_originator_id(mut input: Bytes) -> Result { - if input.remaining() != 4 { - return Err(ParserError::ParseError( - "ORIGINATOR_ID attribute must be 4 bytes".to_string(), - )); - } +pub fn parse_originator_id(mut input: &[u8]) -> Result { + input.expect_remaining_eq(4, "ORIGINATOR_ID")?; Ok(AttributeValue::OriginatorId(input.read_ipv4_address()?)) } @@ -21,9 +16,7 @@ mod tests { #[test] fn test_parse_originator_id() { let ipv4 = Ipv4Addr::from_str("10.0.0.1").unwrap(); - if let Ok(AttributeValue::OriginatorId(n)) = - parse_originator_id(Bytes::from(ipv4.octets().to_vec())) - { + if let Ok(AttributeValue::OriginatorId(n)) = parse_originator_id(&ipv4.octets()) { assert_eq!(n, ipv4); } else { panic!() diff --git a/src/parser/bgp/attributes/attr_10_13_cluster.rs b/src/parser/bgp/attributes/attr_10_13_cluster.rs index 12753b6..66e6279 100644 --- a/src/parser/bgp/attributes/attr_10_13_cluster.rs +++ b/src/parser/bgp/attributes/attr_10_13_cluster.rs @@ -1,11 +1,10 @@ use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::{Buf, Bytes}; /// -pub fn parse_clusters(mut input: Bytes) -> Result { - let mut clusters = Vec::with_capacity(input.remaining() / 4); +pub fn parse_clusters(mut input: &[u8]) -> Result { + let mut clusters = ClusterList::with_capacity(input.remaining() / 4); while input.remaining() > 0 { clusters.push(input.read_u32()?); } @@ -18,9 +17,9 @@ mod tests { #[test] fn test_parse_clusters() { - if let Ok(AttributeValue::Clusters(n)) = parse_clusters(Bytes::from(vec![ - 0xC0, 0x00, 0x02, 0x01, 0xC0, 0x00, 0x02, 0x02, - ])) { + if let Ok(AttributeValue::Clusters(n)) = + parse_clusters(&[0xC0, 0x00, 0x02, 0x01, 0xC0, 0x00, 0x02, 0x02]) + { assert_eq!(n.len(), 2); assert_eq!(n[0], 0xC0000201); assert_eq!(n[1], 0xC0000202); diff --git a/src/parser/bgp/attributes/attr_14_15_nlri.rs b/src/parser/bgp/attributes/attr_14_15_nlri.rs index d709cce..4e5f1df 100644 --- a/src/parser/bgp/attributes/attr_14_15_nlri.rs +++ b/src/parser/bgp/attributes/attr_14_15_nlri.rs @@ -2,10 +2,9 @@ use crate::models::*; use crate::parser::bgp::attributes::attr_03_next_hop::parse_mp_next_hop; use crate::parser::{parse_nlri_list, ReadUtils}; use crate::ParserError; -use bytes::Bytes; use log::warn; +use smallvec::smallvec; -/// /// /// The attribute is encoded as shown below: /// +---------------------------------------------------------+ @@ -21,90 +20,76 @@ use log::warn; /// +---------------------------------------------------------+ /// | Network Layer Reachability Information (variable) | /// +---------------------------------------------------------+ -pub fn parse_nlri( - mut input: Bytes, - afi: &Option, - safi: &Option, - prefixes: &Option<&[NetworkPrefix]>, - reachable: bool, // whether the NLRI is announcements or withdrawals +pub fn parse_reach_nlri( + mut input: &[u8], + afi: Option, + safi: Option, + prefixes: Option<&NetworkPrefix>, additional_paths: bool, // whether the NLRI is part of an additional paths message ) -> Result { let first_byte_zero = input[0] == 0; // read address family let afi = match afi { - Some(afi) => { - if first_byte_zero { - input.read_afi()? - } else { - afi.to_owned() - } - } - None => input.read_afi()?, + Some(afi) if !first_byte_zero => afi, + _ => Afi::try_from(input.read_u16()?)?, }; let safi = match safi { - Some(safi) => { - if first_byte_zero { - input.read_safi()? - } else { - safi.to_owned() - } - } - None => input.read_safi()?, + Some(safi) if !first_byte_zero => safi, + _ => Safi::try_from(input.read_u8()?)?, }; - let mut next_hop = None; - if reachable { - let next_hop_length = input.read_u8()? as usize; - input.has_n_remaining(next_hop_length)?; - let next_hop_bytes = input.split_to(next_hop_length); - next_hop = match parse_mp_next_hop(next_hop_bytes) { - Ok(x) => x, - Err(e) => return Err(e), - }; - } + let next_hop_length = input.read_u8()? as usize; + input.require_n_remaining(next_hop_length, "mp next hop")?; + let next_hop_bytes = input.split_to(next_hop_length)?; + let next_hop = parse_mp_next_hop(next_hop_bytes)?; let prefixes = match prefixes { - Some(pfxs) => { - // skip parsing prefixes: https://datatracker.ietf.org/doc/html/rfc6396#section-4.3.4 - if first_byte_zero { - if reachable { - // skip reserved byte for reachable NRLI - if input.read_u8()? != 0 { - warn!("NRLI reserved byte not 0"); - } - } - parse_nlri_list(input, additional_paths, &afi)? - } else { - pfxs.to_vec() - } - } - None => { - if reachable { - // skip reserved byte for reachable NRLI - if input.read_u8()? != 0 { - warn!("NRLI reserved byte not 0"); - } + // skip parsing prefixes: https://datatracker.ietf.org/doc/html/rfc6396#section-4.3.4 + Some(prefix) if !first_byte_zero => smallvec![*prefix], + _ => { + // skip reserved byte for reachable NRLI + if input.read_u8()? != 0 { + warn!("NRLI reserved byte not 0"); } - parse_nlri_list(input, additional_paths, &afi)? + + parse_nlri_list(input, additional_paths, afi)? } }; - // Reserved field, should ignore - match reachable { - true => Ok(AttributeValue::MpReachNlri(Nlri { - afi, - safi, - next_hop, - prefixes, - })), - false => Ok(AttributeValue::MpUnreachNlri(Nlri { - afi, - safi, - next_hop, - prefixes, - })), - } + Ok(AttributeValue::MpReachNlri(ReachableNlri::new( + afi, safi, next_hop, prefixes, + ))) +} + +pub fn parse_unreach_nlri( + mut input: &[u8], + afi: Option, + safi: Option, + prefixes: Option<&NetworkPrefix>, + additional_paths: bool, // whether the NLRI is part of an additional paths message +) -> Result { + let first_byte_zero = input[0] == 0; + + // read address family + let afi = match afi { + Some(afi) if !first_byte_zero => afi, + _ => input.read_afi()?, + }; + let safi = match safi { + Some(safi) if !first_byte_zero => safi, + _ => input.read_safi()?, + }; + + let prefixes = match prefixes { + // skip parsing prefixes: https://datatracker.ietf.org/doc/html/rfc6396#section-4.3.4 + Some(prefix) if !first_byte_zero => smallvec![*prefix], + _ => parse_nlri_list(input, additional_paths, afi)?, + }; + + Ok(AttributeValue::MpUnreachNlri(UnreachableNlri::new( + afi, safi, prefixes, + ))) } #[cfg(test)] @@ -116,7 +101,7 @@ mod tests { #[test] fn test_parsing_nlri_simple() { - let test_bytes = Bytes::from(vec![ + let test_bytes = &[ 0x00, 0x01, // address family: IPv4 0x01, // safi: unicast 0x04, // next hop length: 4 @@ -125,21 +110,19 @@ mod tests { // NLRI 0x18, // 24 bits prefix length 0xC0, 0x00, 0x02, // 192.0.2 - ]); - let res = parse_nlri(test_bytes, &None, &None, &None, true, false); + ]; + let res = parse_reach_nlri(test_bytes, None, None, None, false); if let Ok(AttributeValue::MpReachNlri(nlri)) = res { - assert_eq!(nlri.afi, Afi::Ipv4); - assert_eq!(nlri.safi, Safi::Unicast); + assert_eq!(nlri.address_family(), Afi::Ipv4); + assert_eq!(nlri.safi(), Safi::Unicast); assert_eq!( - nlri.next_hop, - Some(NextHopAddress::Ipv4( - Ipv4Addr::from_str("192.0.2.1").unwrap() - )) + nlri.next_hop(), + NextHopAddress::Ipv4(Ipv4Addr::from_str("192.0.2.1").unwrap()) ); assert_eq!( nlri.prefixes, - vec![NetworkPrefix::from_str("192.0.2.0/24").unwrap()] + PrefixList::from([NetworkPrefix::from_str("192.0.2.0/24").unwrap()]) ); } else { panic!("Unexpected result: {:?}", res); @@ -148,7 +131,7 @@ mod tests { #[test] fn test_parsing_nlri_add_path() { - let test_bytes = Bytes::from(vec![ + let test_bytes = &[ 0x00, 0x01, // address family: IPv4 0x01, // safi: unicast 0x04, // next hop length: 4 @@ -158,17 +141,15 @@ mod tests { 0x00, 0x00, 0x00, 0x7B, // path_id: 123 0x18, // 24 bits prefix length 0xC0, 0x00, 0x02, // 192.0.2 - ]); - let res = parse_nlri(test_bytes, &None, &None, &None, true, true); + ]; + let res = parse_reach_nlri(test_bytes, None, None, None, true); if let Ok(AttributeValue::MpReachNlri(nlri)) = res { - assert_eq!(nlri.afi, Afi::Ipv4); - assert_eq!(nlri.safi, Safi::Unicast); + assert_eq!(nlri.address_family(), Afi::Ipv4); + assert_eq!(nlri.safi(), Safi::Unicast); assert_eq!( - nlri.next_hop, - Some(NextHopAddress::Ipv4( - Ipv4Addr::from_str("192.0.2.1").unwrap() - )) + nlri.next_hop(), + NextHopAddress::Ipv4(Ipv4Addr::from_str("192.0.2.1").unwrap()) ); let prefix = NetworkPrefix::new(IpNet::from_str("192.0.2.0/24").unwrap(), 123); assert_eq!(nlri.prefixes[0], prefix); diff --git a/src/parser/bgp/attributes/attr_16_25_extended_communities.rs b/src/parser/bgp/attributes/attr_16_25_extended_communities.rs index 1589df7..695f0ef 100644 --- a/src/parser/bgp/attributes/attr_16_25_extended_communities.rs +++ b/src/parser/bgp/attributes/attr_16_25_extended_communities.rs @@ -7,13 +7,12 @@ use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::{Buf, Bytes}; use std::net::Ipv4Addr; -pub fn parse_extended_community(mut input: Bytes) -> Result { - let mut communities = Vec::new(); +pub fn parse_extended_community(mut input: &[u8]) -> Result { + let mut communities = ExtendedCommunities::with_capacity(input.remaining() / 8); - while input.remaining() > 0 { + while !input.is_empty() { let ec_type_u8 = input.read_u8()?; let ec: ExtendedCommunity = match ExtendedCommunityType::from(ec_type_u8) { ExtendedCommunityType::TransitiveTwoOctetAs => { @@ -115,8 +114,8 @@ pub fn parse_extended_community(mut input: Bytes) -> Result Result { - let mut communities = Vec::new(); +pub fn parse_ipv6_extended_community(mut input: &[u8]) -> Result { + let mut communities = ExtendedCommunities::with_capacity(input.remaining() / 20); while input.remaining() > 0 { let ec_type_u8 = input.read_u8()?; let sub_type = input.read_u8()?; @@ -147,7 +146,7 @@ mod tests { #[test] fn test_parse_extended_communities_two_octet_as() { - let data: Vec = vec![ + let data = [ 0x00, // Transitive Two Octet AS Specific 0x02, // Route Target 0x00, 0x01, // AS 1 @@ -155,7 +154,7 @@ mod tests { ]; if let AttributeValue::ExtendedCommunities(communities) = - parse_extended_community(Bytes::from(data)).unwrap() + parse_extended_community(&data).unwrap() { assert_eq!(communities.len(), 1); if let ExtendedCommunity::TransitiveTwoOctetAs(community) = &communities[0] { @@ -172,7 +171,7 @@ mod tests { #[test] fn test_parse_extended_communities_ipv4() { - let data: Vec = vec![ + let data = [ 0x01, // Transitive IPv4 Address Specific 0x02, // Route Target 0xC0, 0x00, 0x02, 0x01, // ipv4: 192.0.2.1 @@ -180,7 +179,7 @@ mod tests { ]; if let AttributeValue::ExtendedCommunities(communities) = - parse_extended_community(Bytes::from(data)).unwrap() + parse_extended_community(&data).unwrap() { assert_eq!(communities.len(), 1); if let ExtendedCommunity::TransitiveIpv4Addr(community) = &communities[0] { @@ -197,7 +196,7 @@ mod tests { #[test] fn test_parse_extended_communities_four_octet_as() { - let data: Vec = vec![ + let data = [ 0x02, // Transitive Four Octet AS Specific 0x02, // Route Target 0x00, 0x00, 0x00, 0x01, // AS 1 @@ -205,7 +204,7 @@ mod tests { ]; if let AttributeValue::ExtendedCommunities(communities) = - parse_extended_community(Bytes::from(data)).unwrap() + parse_extended_community(&data).unwrap() { assert_eq!(communities.len(), 1); if let ExtendedCommunity::TransitiveFourOctetAs(community) = &communities[0] { @@ -222,14 +221,14 @@ mod tests { #[test] fn test_parse_extended_communities_opaque() { - let data: Vec = vec![ + let data = [ 0x03, // Transitive Opaque 0x02, // Route Target 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, // Opaque ]; if let AttributeValue::ExtendedCommunities(communities) = - parse_extended_community(Bytes::from(data)).unwrap() + parse_extended_community(&data).unwrap() { assert_eq!(communities.len(), 1); if let ExtendedCommunity::TransitiveOpaque(community) = &communities[0] { @@ -245,7 +244,7 @@ mod tests { #[test] fn test_parse_extended_communities_ipv6() { - let data: Vec = vec![ + let data = [ 0x40, // Transitive IPv6 Address Specific 0x02, // Route Target 0x20, 0x01, 0x0D, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -254,7 +253,7 @@ mod tests { ]; if let AttributeValue::ExtendedCommunities(communities) = - parse_ipv6_extended_community(Bytes::from(data)).unwrap() + parse_ipv6_extended_community(&data).unwrap() { assert_eq!(communities.len(), 1); if let ExtendedCommunity::Ipv6Addr(community) = &communities[0] { diff --git a/src/parser/bgp/attributes/attr_32_large_communities.rs b/src/parser/bgp/attributes/attr_32_large_communities.rs index 68cbfed..fd674ee 100644 --- a/src/parser/bgp/attributes/attr_32_large_communities.rs +++ b/src/parser/bgp/attributes/attr_32_large_communities.rs @@ -1,14 +1,13 @@ use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::{Buf, Bytes}; -pub fn parse_large_communities(mut input: Bytes) -> Result { - let mut communities = Vec::new(); +pub fn parse_large_communities(mut input: &[u8]) -> Result { + let mut communities = LargeCommunities::with_capacity(input.remaining() / 12); while input.remaining() > 0 { - input.has_n_remaining(12)?; // 12 bytes for large community (3x 32 bits integers) - let global_administrator = input.get_u32(); - let local_data = [input.get_u32(), input.get_u32()]; + input.require_n_remaining(12, "large community")?; // 12 bytes for large community (3x 32 bits integers) + let global_administrator = input.read_u32()?; + let local_data = [input.read_u32()?, input.read_u32()?]; communities.push(LargeCommunity::new(global_administrator, local_data)); } Ok(AttributeValue::LargeCommunities(communities)) @@ -29,9 +28,7 @@ mod tests { 0x00, 0x00, 0x00, 0x06, // local data ]; - if let Ok(AttributeValue::LargeCommunities(communities)) = - parse_large_communities(Bytes::from(data)) - { + if let Ok(AttributeValue::LargeCommunities(communities)) = parse_large_communities(&data) { assert_eq!(communities.len(), 2); assert_eq!(communities[0].global_admin, 1); assert_eq!(communities[0].local_data[0], 2); diff --git a/src/parser/bgp/attributes/attr_35_otc.rs b/src/parser/bgp/attributes/attr_35_otc.rs index 2dfc89c..939025f 100644 --- a/src/parser/bgp/attributes/attr_35_otc.rs +++ b/src/parser/bgp/attributes/attr_35_otc.rs @@ -1,7 +1,6 @@ use crate::models::*; use crate::parser::ReadUtils; use crate::ParserError; -use bytes::Bytes; /// parse RFC9234 OnlyToCustomer attribute. /// @@ -19,7 +18,8 @@ use bytes::Bytes; /// 1. If a route is to be advertised to a Customer, a Peer, or an RS-Client (when the sender is an RS), and the OTC Attribute is not present, then when advertising the route, an OTC Attribute MUST be added with a value equal to the AS number of the local AS. /// 2. If a route already contains the OTC Attribute, it MUST NOT be propagated to Providers, Peers, or RSes. /// ``` -pub fn parse_only_to_customer(mut input: Bytes) -> Result { +pub fn parse_only_to_customer(mut input: &[u8]) -> Result { + input.expect_remaining_eq(4, "ONLY_TO_CUSTOMER")?; let remote_asn = input.read_u32()?; Ok(AttributeValue::OnlyToCustomer(Asn::new_32bit(remote_asn))) } @@ -30,9 +30,7 @@ mod tests { #[test] fn test_parse_otc() { - if let Ok(AttributeValue::OnlyToCustomer(asn)) = - parse_only_to_customer(Bytes::from(vec![0, 0, 0, 123])) - { + if let Ok(AttributeValue::OnlyToCustomer(asn)) = parse_only_to_customer(&[0, 0, 0, 123]) { assert_eq!(asn, 123); } else { panic!("parsing error") diff --git a/src/parser/bgp/attributes/mod.rs b/src/parser/bgp/attributes/mod.rs index e4e35d6..b459326 100644 --- a/src/parser/bgp/attributes/mod.rs +++ b/src/parser/bgp/attributes/mod.rs @@ -12,8 +12,9 @@ mod attr_16_25_extended_communities; mod attr_32_large_communities; mod attr_35_otc; -use bytes::{Buf, Bytes}; -use log::{debug, warn}; +use crate::bgp::attributes::attr_14_15_nlri::{parse_reach_nlri, parse_unreach_nlri}; +use log::{debug, trace, warn}; +use smallvec::smallvec; use crate::models::*; @@ -27,7 +28,6 @@ use crate::parser::bgp::attributes::attr_07_18_aggregator::parse_aggregator; use crate::parser::bgp::attributes::attr_08_communities::parse_regular_communities; use crate::parser::bgp::attributes::attr_09_originator::parse_originator_id; use crate::parser::bgp::attributes::attr_10_13_cluster::parse_clusters; -use crate::parser::bgp::attributes::attr_14_15_nlri::parse_nlri; use crate::parser::bgp::attributes::attr_16_25_extended_communities::{ parse_extended_community, parse_ipv6_extended_community, }; @@ -52,83 +52,31 @@ impl AttributeParser { /// the slice is the total byte length of the attributes section of the message. pub fn parse_attributes( &self, - mut data: Bytes, - asn_len: &AsnLength, + mut data: &[u8], + asn_len: AsnLength, afi: Option, safi: Option, - prefixes: Option<&[NetworkPrefix]>, + prefixes: Option<&NetworkPrefix>, ) -> Result { let mut attributes: Vec = Vec::with_capacity(20); - while data.remaining() >= 3 { - // each attribute is at least 3 bytes: flag(1) + type(1) + length(1) - // thus the while loop condition is set to be at least 3 bytes to read. - - // has content to read - let flag = AttrFlags::from_bits_retain(data.get_u8()); - let attr_type = data.get_u8(); + while !data.is_empty() { + let flag = AttrFlags::from_bits_retain(data.read_u8()?); + let attr_type = AttrType::from(data.read_u8()?); let attr_length = match flag.contains(AttrFlags::EXTENDED) { - false => data.get_u8() as usize, - true => data.get_u16() as usize, + false => data.read_u8()? as usize, + true => data.read_u16()? as usize, }; - let mut partial = false; - if flag.contains(AttrFlags::PARTIAL) { - /* - https://datatracker.ietf.org/doc/html/rfc4271#section-4.3 - - > The third high-order bit (bit 2) of the Attribute Flags octet - > is the Partial bit. It defines whether the information - > contained in the optional transitive attribute is partial (if - > set to 1) or complete (if set to 0). For well-known attributes - > and for optional non-transitive attributes, the Partial bit - > MUST be set to 0. - */ - partial = true; - } - - debug!( + trace!( "reading attribute: type -- {:?}, length -- {}", - &attr_type, attr_length + &attr_type, + attr_length ); - let attr_type = match AttrType::from(attr_type) { - attr_type @ AttrType::Unknown(unknown_type) => { - // skip pass the remaining bytes of this attribute - let bytes = data.read_n_bytes(attr_length)?; - let attr_value = match get_deprecated_attr_type(unknown_type) { - Some(t) => { - debug!("deprecated attribute type: {} - {}", unknown_type, t); - AttributeValue::Deprecated(AttrRaw { attr_type, bytes }) - } - None => { - debug!("unknown attribute type: {}", unknown_type); - AttributeValue::Unknown(AttrRaw { attr_type, bytes }) - } - }; - - assert_eq!(attr_type, attr_value.attr_type()); - attributes.push(Attribute { - value: attr_value, - flag, - }); - continue; - } - t => t, - }; - - let bytes_left = data.remaining(); - - if data.remaining() < attr_length { - warn!( - "not enough bytes: input bytes left - {}, want to read - {}; skipping", - bytes_left, attr_length - ); - // break and return already parsed attributes - break; - } // we know data has enough bytes to read, so we can split the bytes into a new Bytes object - let mut attr_data = data.split_to(attr_length); + data.require_n_remaining(attr_length, "Attribute")?; + let mut attr_data = data.split_to(attr_length)?; let attr = match attr_type { AttrType::ORIGIN => parse_origin(attr_data), @@ -151,26 +99,16 @@ impl AttributeParser { }), AttrType::ORIGINATOR_ID => parse_originator_id(attr_data), AttrType::CLUSTER_LIST => parse_clusters(attr_data), - AttrType::MP_REACHABLE_NLRI => parse_nlri( - attr_data, - &afi, - &safi, - &prefixes, - true, - self.additional_paths, - ), - AttrType::MP_UNREACHABLE_NLRI => parse_nlri( - attr_data, - &afi, - &safi, - &prefixes, - false, - self.additional_paths, - ), - AttrType::AS4_PATH => parse_as_path(attr_data, &AsnLength::Bits32) + AttrType::MP_REACHABLE_NLRI => { + parse_reach_nlri(attr_data, afi, safi, prefixes, self.additional_paths) + } + AttrType::MP_UNREACHABLE_NLRI => { + parse_unreach_nlri(attr_data, afi, safi, prefixes, self.additional_paths) + } + AttrType::AS4_PATH => parse_as_path(attr_data, AsnLength::Bits32) .map(|path| AttributeValue::AsPath { path, is_as4: true }), AttrType::AS4_AGGREGATOR => { - parse_aggregator(attr_data, &AsnLength::Bits32).map(|(asn, id)| { + parse_aggregator(attr_data, AsnLength::Bits32).map(|(asn, id)| { AttributeValue::Aggregator { asn, id, @@ -187,17 +125,27 @@ impl AttributeParser { parse_ipv6_extended_community(attr_data) } AttrType::DEVELOPMENT => { - let mut value = vec![]; - for _i in 0..attr_length { - value.push(attr_data.get_u8()); - } - Ok(AttributeValue::Development(value)) + let mut buffer = smallvec![0; attr_length]; + attr_data.read_exact(&mut buffer)?; + Ok(AttributeValue::Development(buffer)) } AttrType::ONLY_TO_CUSTOMER => parse_only_to_customer(attr_data), - _ => Err(ParserError::Unsupported(format!( - "unsupported attribute type: {:?}", - attr_type - ))), + AttrType::Unknown(unknown_type) => { + // skip pass the remaining bytes of this attribute + let bytes = data.read_n_bytes(attr_length)?; + match get_deprecated_attr_type(unknown_type) { + Some(t) => { + debug!("deprecated attribute type: {} - {}", unknown_type, t); + Ok(AttributeValue::Deprecated(AttrRaw { attr_type, bytes })) + } + None => { + debug!("unknown attribute type: {}", unknown_type); + Ok(AttributeValue::Unknown(AttrRaw { attr_type, bytes })) + } + } + } + // TODO: Should it be treated as a raw attribute instead? + _ => Err(ParserError::UnsupportedAttributeType(attr_type)), }; match attr { @@ -205,14 +153,24 @@ impl AttributeParser { assert_eq!(attr_type, value.attr_type()); attributes.push(Attribute { value, flag }); } + Err(e) if flag.contains(AttrFlags::PARTIAL) => { + /* + https://datatracker.ietf.org/doc/html/rfc4271#section-4.3 + + > The third high-order bit (bit 2) of the Attribute Flags octet + > is the Partial bit. It defines whether the information + > contained in the optional transitive attribute is partial (if + > set to 1) or complete (if set to 0). For well-known attributes + > and for optional non-transitive attributes, the Partial bit + > MUST be set to 0. + */ + + // it's ok to have errors when reading partial bytes + warn!("PARTIAL: {}", e); + } Err(e) => { - if partial { - // it's ok to have errors when reading partial bytes - warn!("PARTIAL: {}", e.to_string()); - } else { - warn!("{}", e.to_string()); - } - continue; + warn!("{}", e); + return Err(e); } }; } diff --git a/src/parser/bgp/messages.rs b/src/parser/bgp/messages.rs index 3ff46f6..c7cca5b 100644 --- a/src/parser/bgp/messages.rs +++ b/src/parser/bgp/messages.rs @@ -1,5 +1,4 @@ use crate::models::*; -use bytes::{Buf, Bytes}; use std::convert::TryFrom; use crate::error::ParserError; @@ -27,13 +26,13 @@ use log::warn; /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ /// ``` pub fn parse_bgp_message( - data: &mut Bytes, + data: &mut &[u8], add_path: bool, - asn_len: &AsnLength, + asn_len: AsnLength, ) -> Result { let total_size = data.len(); - data.has_n_remaining(19)?; - data.advance(16); + data.require_n_remaining(19, "BGP message marker")?; + data.advance(16)?; /* This 2-octet unsigned integer indicates the total length of the message, including the header in octets. Thus, it allows one @@ -45,37 +44,29 @@ pub fn parse_bgp_message( have the smallest value required, given the rest of the message. */ - let length = data.get_u16(); + let length = data.read_u16()?; if !(19..=4096).contains(&length) { - return Err(ParserError::ParseError(format!( - "invalid BGP message length {}", - length - ))); + return Err(ParserError::InvalidBgpMessageLength(length)); } + // TODO: Why do we sometimes change our length estimate? let bgp_msg_length = if (length as usize) > total_size { total_size - 19 } else { length as usize - 19 }; - let msg_type: BgpMessageType = match BgpMessageType::try_from(data.get_u8()) { - Ok(t) => t, - Err(_) => { - return Err(ParserError::ParseError( - "Unknown BGP Message Type".to_string(), - )) - } - }; + let msg_type: BgpMessageType = BgpMessageType::try_from(data.read_u8()?)?; if data.remaining() != bgp_msg_length { + // TODO: Why is this not a hard error? warn!( "BGP message length {} does not match the actual length {}", bgp_msg_length, data.remaining() ); } - let mut msg_data = data.split_to(bgp_msg_length); + let mut msg_data = data.split_to(bgp_msg_length)?; Ok(match msg_type { BgpMessageType::OPEN => BgpMessage::Open(parse_bgp_open_message(&mut msg_data)?), @@ -96,7 +87,7 @@ pub fn parse_bgp_message( /// messages, but not critical errors. /// pub fn parse_bgp_notification_message( - mut input: Bytes, + mut input: &[u8], ) -> Result { let error_code = input.read_u8()?; let error_subcode = input.read_u8()?; @@ -110,17 +101,18 @@ pub fn parse_bgp_notification_message( /// Parse BGP OPEN messages. /// /// The parsing of BGP OPEN messages also includes decoding the BGP capabilities. -pub fn parse_bgp_open_message(input: &mut Bytes) -> Result { - input.has_n_remaining(10)?; - let version = input.get_u8(); - let asn = Asn::new_16bit(input.get_u16()); - let hold_time = input.get_u16(); +pub fn parse_bgp_open_message(input: &mut &[u8]) -> Result { + input.require_n_remaining(10, "BGP open message header")?; + let version = input.read_u8()?; + let asn = Asn::new_16bit(input.read_u16()?); + let hold_time = input.read_u16()?; let sender_ip = input.read_ipv4_address()?; - let opt_params_len = input.get_u8(); + let opt_params_len = input.read_u8()?; // let pos_end = input.position() + opt_params_len as u64; if input.remaining() != opt_params_len as usize { + // TODO: This seems like it should become a hard error warn!( "BGP open message length {} does not match the actual length {}", opt_params_len, @@ -133,7 +125,7 @@ pub fn parse_bgp_open_message(input: &mut Bytes) -> Result = vec![]; while input.remaining() >= 2 { - let param_type = input.get_u8(); + let param_type = input.read_u8()?; if first { // first parameter, check if it is extended length message if opt_params_len == 255 && param_type == 255 { @@ -146,7 +138,7 @@ pub fn parse_bgp_open_message(input: &mut Bytes) -> Result Result Result, ParserError> { +fn read_nlri(mut input: &[u8], afi: &Afi, add_path: bool) -> Result { let length = input.len(); if length == 0 { - return Ok(vec![]); + return Ok(PrefixList::new()); } if length == 1 { + // TODO: Should this become a hard error? // 1 byte does not make sense warn!("seeing strange one-byte NLRI field"); - input.advance(1); // skip the byte - return Ok(vec![]); + input.advance(1)?; // skip the byte + return Ok(PrefixList::new()); } - parse_nlri_list(input, add_path, afi) + parse_nlri_list(input, add_path, *afi) } /// read bgp update message. /// /// RFC: pub fn parse_bgp_update_message( - mut input: Bytes, + mut input: &[u8], add_path: bool, - asn_len: &AsnLength, + asn_len: AsnLength, ) -> Result { // AFI for routes out side attributes are IPv4 ONLY. let afi = Afi::Ipv4; // parse withdrawn prefixes nlri let withdrawn_bytes_length = input.read_u16()? as usize; - let withdrawn_bytes = input.split_to(withdrawn_bytes_length); + let withdrawn_bytes = input.split_to(withdrawn_bytes_length)?; let withdrawn_prefixes = read_nlri(withdrawn_bytes, &afi, add_path)?; // parse attributes let attribute_length = input.read_u16()? as usize; let attr_parser = AttributeParser::new(add_path); - input.has_n_remaining(attribute_length)?; - let attr_data_slice = input.split_to(attribute_length); + input.require_n_remaining(attribute_length, "update attributes")?; + let attr_data_slice = input.split_to(attribute_length)?; let attributes = attr_parser.parse_attributes(attr_data_slice, asn_len, None, None, None)?; // parse announced prefixes nlri. diff --git a/src/parser/bmp/error.rs b/src/parser/bmp/error.rs index 8f2695b..3193b58 100644 --- a/src/parser/bmp/error.rs +++ b/src/parser/bmp/error.rs @@ -1,81 +1,28 @@ -use crate::bmp::messages::headers::PeerType; -use crate::bmp::messages::initiation_message::InitiationTlvType; -use crate::bmp::messages::route_mirroring::RouteMirroringInfo; -use crate::bmp::messages::termination_message::TerminationTlvType; -use crate::bmp::messages::BmpMsgType; use crate::ParserError; -use num_enum::TryFromPrimitiveError; -use std::error::Error; -use std::fmt::{Display, Formatter}; +use num_enum::{TryFromPrimitive, TryFromPrimitiveError}; +use std::io; +use thiserror::Error; -#[derive(Debug)] +#[derive(Debug, Error)] pub enum ParserBmpError { + #[error(transparent)] + IoError(#[from] io::Error), + #[error(transparent)] + ParseError(#[from] ParserError), + #[error("invalid OpenBMP header")] InvalidOpenBmpHeader, + #[error("invalid stats data length {0}")] + InvalidStatsDataLength(u16), + #[error("unsupported OpenBMP message")] UnsupportedOpenBmpMessage, - CorruptedBmpMessage, - TruncatedBmpMessage, } -impl Display for ParserBmpError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - ParserBmpError::InvalidOpenBmpHeader => { - write!(f, "Invalid OpenBMP header") - } - ParserBmpError::UnsupportedOpenBmpMessage => { - write!(f, "Unsupported OpenBMP message") - } - ParserBmpError::CorruptedBmpMessage => { - write!(f, "Corrupted BMP message") - } - ParserBmpError::TruncatedBmpMessage => { - write!(f, "Truncated BMP message") - } - } - } -} - -impl Error for ParserBmpError {} - -// TODO: These conversions make the error difficult to debug as they drop all of the error context -impl From for ParserBmpError { - fn from(_: std::io::Error) -> Self { - ParserBmpError::InvalidOpenBmpHeader - } -} - -impl From for ParserBmpError { - fn from(_: ParserError) -> Self { - ParserBmpError::CorruptedBmpMessage - } -} - -impl From> for ParserBmpError { - fn from(_: TryFromPrimitiveError) -> Self { - ParserBmpError::InvalidOpenBmpHeader - } -} - -impl From> for ParserBmpError { - fn from(_: TryFromPrimitiveError) -> Self { - ParserBmpError::InvalidOpenBmpHeader - } -} - -impl From> for ParserBmpError { - fn from(_: TryFromPrimitiveError) -> Self { - ParserBmpError::CorruptedBmpMessage - } -} - -impl From> for ParserBmpError { - fn from(_: TryFromPrimitiveError) -> Self { - ParserBmpError::CorruptedBmpMessage - } -} - -impl From> for ParserBmpError { - fn from(_: TryFromPrimitiveError) -> Self { - ParserBmpError::CorruptedBmpMessage +impl From> for ParserBmpError +where + T: TryFromPrimitive, + ParserError: From>, +{ + fn from(value: TryFromPrimitiveError) -> Self { + ParserBmpError::ParseError(ParserError::from(value)) } } diff --git a/src/parser/bmp/messages/headers.rs b/src/parser/bmp/messages/headers.rs index 327d83e..44e5900 100644 --- a/src/parser/bmp/messages/headers.rs +++ b/src/parser/bmp/messages/headers.rs @@ -2,7 +2,6 @@ use crate::models::*; use crate::parser::bmp::error::ParserBmpError; use crate::parser::ReadUtils; use bitflags::bitflags; -use bytes::{Buf, Bytes}; use num_enum::{IntoPrimitive, TryFromPrimitive}; use std::convert::TryFrom; use std::net::IpAddr; @@ -54,11 +53,11 @@ pub struct BmpCommonHeader { pub msg_type: BmpMsgType, } -pub fn parse_bmp_common_header(data: &mut Bytes) -> Result { +pub fn parse_bmp_common_header(data: &mut &[u8]) -> Result { let version = data.read_u8()?; if version != 3 { // has to be 3 per rfc7854 - return Err(ParserBmpError::CorruptedBmpMessage); + return Err(ParserBmpError::InvalidOpenBmpHeader); } let msg_len = data.read_u32()?; @@ -148,14 +147,14 @@ impl PeerFlags { } } -pub fn parse_per_peer_header(data: &mut Bytes) -> Result { +pub fn parse_per_peer_header(data: &mut &[u8]) -> Result { let peer_type = PeerType::try_from(data.read_u8()?)?; let peer_flags = PeerFlags::from_bits_retain(data.read_u8()?); let peer_distinguisher = data.read_u64()?; let peer_ip = match peer_flags.address_family() { Afi::Ipv4 => { - data.advance(12); + data.advance(12)?; IpAddr::V4(data.read_ipv4_address()?) } Afi::Ipv6 => IpAddr::V6(data.read_ipv6_address()?), @@ -163,7 +162,7 @@ pub fn parse_per_peer_header(data: &mut Bytes) -> Result { - data.advance(2); + data.advance(2)?; Asn::new_16bit(data.read_u16()?) } AsnLength::Bits32 => Asn::new_32bit(data.read_u32()?), diff --git a/src/parser/bmp/messages/initiation_message.rs b/src/parser/bmp/messages/initiation_message.rs index 1bbe137..8f56b71 100644 --- a/src/parser/bmp/messages/initiation_message.rs +++ b/src/parser/bmp/messages/initiation_message.rs @@ -1,6 +1,5 @@ use crate::parser::bmp::error::ParserBmpError; use crate::parser::ReadUtils; -use bytes::{Buf, Bytes}; use num_enum::{IntoPrimitive, TryFromPrimitive}; use std::convert::TryFrom; @@ -27,12 +26,12 @@ pub enum InitiationTlvType { SysName = 2, } -pub fn parse_initiation_message(data: &mut Bytes) -> Result { +pub fn parse_initiation_message(data: &mut &[u8]) -> Result { let mut tlvs = vec![]; while data.remaining() > 4 { - let info_type: InitiationTlvType = InitiationTlvType::try_from(data.get_u16())?; - let info_len = data.get_u16(); + let info_type: InitiationTlvType = InitiationTlvType::try_from(data.read_u16()?)?; + let info_len = data.read_u16()?; if data.remaining() < info_len as usize { // not enough bytes to read break; diff --git a/src/parser/bmp/messages/peer_down_notification.rs b/src/parser/bmp/messages/peer_down_notification.rs index 6053f51..4918f15 100644 --- a/src/parser/bmp/messages/peer_down_notification.rs +++ b/src/parser/bmp/messages/peer_down_notification.rs @@ -1,66 +1,50 @@ use crate::parser::bmp::error::ParserBmpError; use crate::parser::ReadUtils; -use bytes::{Buf, Bytes}; +use num_enum::{IntoPrimitive, TryFromPrimitive}; + +#[derive(Debug, Hash, Eq, PartialEq, TryFromPrimitive, IntoPrimitive)] +#[repr(u8)] +pub enum PeerDownReason { + /// The local system closed the session. Following the reason is a BGP PDU containing a BGP + /// NOTIFICATION message that would have been sent to the peer. + LocalSystemClosedSession = 1, + /// The local system closed the session. No notification message was sent. Following the + /// reason code is a 2-byte field containing the code corresponding to the Finite State Machine + /// (FSM) Event that caused the system to close the session (see Section 8.1 of [RFC4271]). Two + /// bytes both set to 0 are used to indicate that no relevant Event code is defined. + LocalSystemClosedSessionWithoutNotification = 2, + /// The remote system closed the session with a notification message. Following the Reason is a + /// BGP PDU containing the BGP NOTIFICATION message as received from the peer. + RemoteSystemClosedSession = 3, + /// The remote system closed the session without a notification message. This includes any + /// unexpected termination of the transport session, so in some cases both the local and remote + /// systems might consider this to apply. + RemoteSystemClosedSessionWithoutNotification = 4, + /// Information for this peer will no longer be sent to the monitoring station for configuration + /// reasons. This does not, strictly speaking, indicate that the peer has gone down, but it + /// does indicate that the monitoring station will not receive updates for the peer. + DisabledDueToConfig = 5, +} #[derive(Debug)] pub struct PeerDownNotification { - pub reason: u8, + pub reason: PeerDownReason, pub data: Option>, } pub fn parse_peer_down_notification( - data: &mut Bytes, + data: &mut &[u8], ) -> Result { - let reason = data.read_u8()?; + let reason = PeerDownReason::try_from(data.read_u8()?)?; let bytes_left = data.remaining(); - let data: Option> = match reason { - 1 => { - /* - The local system closed the session. Following the - Reason is a BGP PDU containing a BGP NOTIFICATION message that - would have been sent to the peer. - */ - Some(data.read_n_bytes(bytes_left)?) - } - 2 => { - /* - The local system closed the session. No notification - message was sent. Following the reason code is a 2-byte field - containing the code corresponding to the Finite State Machine - (FSM) Event that caused the system to close the session (see - Section 8.1 of [RFC4271]). Two bytes both set to 0 are used to - indicate that no relevant Event code is defined. - */ + let data = match reason { + PeerDownReason::LocalSystemClosedSession => Some(data.read_n_bytes(bytes_left)?), + PeerDownReason::LocalSystemClosedSessionWithoutNotification => { Some(data.read_n_bytes(bytes_left)?) } - 3 => { - /* - The remote system closed the session with a notification - message. Following the Reason is a BGP PDU containing the BGP - NOTIFICATION message as received from the peer. - */ - Some(data.read_n_bytes(bytes_left)?) - } - 4 => { - /* - The remote system closed the session without a - notification message. This includes any unexpected termination of - the transport session, so in some cases both the local and remote - systems might consider this to apply. - */ - None - } - 5 => { - /* - Information for this peer will no longer be sent to the - monitoring station for configuration reasons. This does not, - strictly speaking, indicate that the peer has gone down, but it - does indicate that the monitoring station will not receive updates - for the peer. - */ - None - } - _ => return Err(ParserBmpError::CorruptedBmpMessage), + PeerDownReason::RemoteSystemClosedSession => Some(data.read_n_bytes(bytes_left)?), + PeerDownReason::RemoteSystemClosedSessionWithoutNotification => None, + PeerDownReason::DisabledDueToConfig => None, }; Ok(PeerDownNotification { reason, data }) } diff --git a/src/parser/bmp/messages/peer_up_notification.rs b/src/parser/bmp/messages/peer_up_notification.rs index 4016c81..17e7bbb 100644 --- a/src/parser/bmp/messages/peer_up_notification.rs +++ b/src/parser/bmp/messages/peer_up_notification.rs @@ -2,7 +2,6 @@ use crate::models::*; use crate::parser::bgp::messages::parse_bgp_open_message; use crate::parser::bmp::error::ParserBmpError; use crate::parser::ReadUtils; -use bytes::{Buf, Bytes}; use std::net::IpAddr; #[derive(Debug)] @@ -23,12 +22,12 @@ pub struct PeerUpNotificationTlv { } pub fn parse_peer_up_notification( - data: &mut Bytes, + data: &mut &[u8], afi: &Afi, ) -> Result { let local_addr: IpAddr = match afi { Afi::Ipv4 => { - data.advance(12); + data.advance(12)?; let ip = data.read_ipv4_address()?; ip.into() } diff --git a/src/parser/bmp/messages/route_mirroring.rs b/src/parser/bmp/messages/route_mirroring.rs index 555e046..84503d0 100644 --- a/src/parser/bmp/messages/route_mirroring.rs +++ b/src/parser/bmp/messages/route_mirroring.rs @@ -2,7 +2,6 @@ use crate::models::*; use crate::parser::bgp::messages::parse_bgp_update_message; use crate::parser::bmp::error::ParserBmpError; use crate::parser::ReadUtils; -use bytes::{Buf, Bytes}; use num_enum::{IntoPrimitive, TryFromPrimitive}; use std::convert::TryFrom; @@ -23,7 +22,23 @@ pub enum RouteMirroringValue { Information(RouteMirroringInfo), } -#[derive(Debug, TryFromPrimitive, IntoPrimitive)] +impl RouteMirroringValue { + pub const fn mirroring_type(&self) -> RouteMirroringTlvType { + match self { + RouteMirroringValue::BgpMessage(_) => RouteMirroringTlvType::BgpMessage, + RouteMirroringValue::Information(_) => RouteMirroringTlvType::Information, + } + } +} + +#[derive(Debug, TryFromPrimitive, IntoPrimitive, Hash, Eq, PartialEq)] +#[repr(u16)] +pub enum RouteMirroringTlvType { + BgpMessage = 0, + Information = 1, +} + +#[derive(Debug, TryFromPrimitive, IntoPrimitive, Hash, Eq, PartialEq)] #[repr(u16)] pub enum RouteMirroringInfo { ErroredPdu = 0, @@ -31,22 +46,22 @@ pub enum RouteMirroringInfo { } pub fn parse_route_mirroring( - data: &mut Bytes, - asn_len: &AsnLength, + data: &mut &[u8], + asn_len: AsnLength, ) -> Result { let mut tlvs = vec![]; - while data.remaining() > 4 { - match data.read_u16()? { - 0 => { + while !data.is_empty() { + match RouteMirroringTlvType::try_from(data.read_u16()?)? { + RouteMirroringTlvType::BgpMessage => { let info_len = data.read_u16()?; - let bytes = data.split_to(info_len as usize); + let bytes = data.split_to(info_len as usize)?; let value = parse_bgp_update_message(bytes, false, asn_len)?; tlvs.push(RouteMirroringTlv { info_len, value: RouteMirroringValue::BgpMessage(value), }); } - 1 => { + RouteMirroringTlvType::Information => { let info_len = data.read_u16()?; let value = RouteMirroringInfo::try_from(data.read_u16()?)?; tlvs.push(RouteMirroringTlv { @@ -54,7 +69,6 @@ pub fn parse_route_mirroring( value: RouteMirroringValue::Information(value), }); } - _ => return Err(ParserBmpError::CorruptedBmpMessage), } } Ok(RouteMirroring { tlvs }) diff --git a/src/parser/bmp/messages/route_monitoring.rs b/src/parser/bmp/messages/route_monitoring.rs index 429065a..abe86d7 100644 --- a/src/parser/bmp/messages/route_monitoring.rs +++ b/src/parser/bmp/messages/route_monitoring.rs @@ -1,7 +1,6 @@ use crate::models::*; use crate::parser::bgp::messages::parse_bgp_message; use crate::parser::bmp::error::ParserBmpError; -use bytes::Bytes; #[derive(Debug)] pub struct RouteMonitoring { @@ -9,8 +8,8 @@ pub struct RouteMonitoring { } pub fn parse_route_monitoring( - data: &mut Bytes, - asn_len: &AsnLength, + data: &mut &[u8], + asn_len: AsnLength, ) -> Result { // let bgp_update = parse_bgp_update_message(reader, false, afi, asn_len, total_len)?; let bgp_update = parse_bgp_message(data, false, asn_len)?; diff --git a/src/parser/bmp/messages/stats_report.rs b/src/parser/bmp/messages/stats_report.rs index 914ae28..b380004 100644 --- a/src/parser/bmp/messages/stats_report.rs +++ b/src/parser/bmp/messages/stats_report.rs @@ -1,6 +1,5 @@ use crate::parser::bmp::error::ParserBmpError; use crate::parser::ReadUtils; -use bytes::Bytes; #[derive(Debug)] pub struct StatsReport { @@ -21,7 +20,7 @@ pub enum StatsData { Gauge(u64), } -pub fn parse_stats_report(data: &mut Bytes) -> Result { +pub fn parse_stats_report(data: &mut &[u8]) -> Result { let stats_count = data.read_u32()?; let mut counters = vec![]; for _ in 0..stats_count { @@ -30,7 +29,7 @@ pub fn parse_stats_report(data: &mut Bytes) -> Result StatsData::Counter(data.read_u32()?), 8 => StatsData::Gauge(data.read_u64()?), - _ => return Err(ParserBmpError::CorruptedBmpMessage), + x => return Err(ParserBmpError::InvalidStatsDataLength(x)), }; counters.push(StatCounter { stat_type, diff --git a/src/parser/bmp/messages/termination_message.rs b/src/parser/bmp/messages/termination_message.rs index ace725b..6a79d3f 100644 --- a/src/parser/bmp/messages/termination_message.rs +++ b/src/parser/bmp/messages/termination_message.rs @@ -1,6 +1,5 @@ use crate::parser::bmp::error::ParserBmpError; use crate::parser::ReadUtils; -use bytes::{Buf, Bytes}; use num_enum::{IntoPrimitive, TryFromPrimitive}; use std::convert::TryFrom; @@ -26,7 +25,7 @@ pub enum TerminationTlvType { Reason = 1, } -pub fn parse_termination_message(data: &mut Bytes) -> Result { +pub fn parse_termination_message(data: &mut &[u8]) -> Result { let mut tlvs = vec![]; while data.remaining() > 4 { diff --git a/src/parser/bmp/mod.rs b/src/parser/bmp/mod.rs index b7d8158..a86d655 100644 --- a/src/parser/bmp/mod.rs +++ b/src/parser/bmp/mod.rs @@ -4,7 +4,6 @@ Provides parsing for BMP and OpenBMP binary-formatted messages. use crate::parser::bmp::error::ParserBmpError; use crate::parser::bmp::messages::*; pub use crate::parser::bmp::openbmp::parse_openbmp_header; -use bytes::Bytes; pub mod error; pub mod messages; @@ -15,13 +14,13 @@ pub mod openbmp; /// An OpenBMP `raw_bmp` message contains a [OpenBmpHeader][OpenBmpHeader] and a [BmpMessage]. /// /// [OpenBmpHeader]: crate::parser::bmp::openbmp::OpenBmpHeader -pub fn parse_openbmp_msg(mut data: Bytes) -> Result { +pub fn parse_openbmp_msg(mut data: &[u8]) -> Result { let _header = parse_openbmp_header(&mut data)?; parse_bmp_msg(&mut data) } /// Parse a BMP message. -pub fn parse_bmp_msg(data: &mut Bytes) -> Result { +pub fn parse_bmp_msg(data: &mut &[u8]) -> Result { let common_header = parse_bmp_common_header(data)?; // let mut buffer ; @@ -40,7 +39,7 @@ pub fn parse_bmp_msg(data: &mut Bytes) -> Result { match &common_header.msg_type { BmpMsgType::RouteMonitoring => { let per_peer_header = parse_per_peer_header(data)?; - let msg = parse_route_monitoring(data, &per_peer_header.peer_flags.asn_length())?; + let msg = parse_route_monitoring(data, per_peer_header.peer_flags.asn_length())?; Ok(BmpMessage { common_header, per_peer_header: Some(per_peer_header), @@ -49,7 +48,7 @@ pub fn parse_bmp_msg(data: &mut Bytes) -> Result { } BmpMsgType::RouteMirroringMessage => { let per_peer_header = parse_per_peer_header(data)?; - let msg = parse_route_mirroring(data, &per_peer_header.peer_flags.asn_length())?; + let msg = parse_route_mirroring(data, per_peer_header.peer_flags.asn_length())?; Ok(BmpMessage { common_header, per_peer_header: Some(per_peer_header), @@ -112,7 +111,7 @@ mod tests { fn test_peer_down_notification() { let input = "4f424d500107006400000033800c6184b9c2000c602cbf4f072f3ae149d23486024bc3dadfc4000a69732d63632d626d7031c677060bdd020a9e92be000200de2e3180df3369000000000000000000000000000c726f7574652d76696577733500000001030000003302000000000000000000000000000000000000000000003fda060e00000da30000000061523c36000c0e1c0200000a"; let decoded = hex::decode(input).unwrap(); - let mut data = Bytes::from(decoded); + let mut data = &decoded[..]; let _header = parse_openbmp_header(&mut data).unwrap(); let _msg = parse_bmp_msg(&mut data).unwrap(); } @@ -121,7 +120,7 @@ mod tests { fn test_route_monitoring() { let input = "4f424d500107005c000000b0800c618881530002f643fef880938d19e9d632c815d1e95a87e1000a69732d61682d626d7031eb4de4e596b282c6a995b067df4abc8cc342f19200000000000000000000000000046c696e780000000103000000b00000c00000000000000000200107f800040000000000001aae000400001aae5474800e02dddf5d00000000ffffffffffffffffffffffffffffffff00800200000069400101005002001602050000192f00001aae0000232a000328eb00032caec008181aae42681aae44581aae464f1aae59d91aae866543000000900e002c00020120200107f800040000000000001aae0004fe8000000000000082711ffffe7f29f100302a0fca8000010a"; let decoded = hex::decode(input).unwrap(); - let mut data = Bytes::from(decoded); + let mut data = &decoded[..]; let _header = parse_openbmp_header(&mut data).unwrap(); let _msg = parse_bmp_msg(&mut data).unwrap(); } diff --git a/src/parser/bmp/openbmp.rs b/src/parser/bmp/openbmp.rs index 92a941b..6cb5e6c 100644 --- a/src/parser/bmp/openbmp.rs +++ b/src/parser/bmp/openbmp.rs @@ -1,6 +1,5 @@ use crate::parser::bmp::error::ParserBmpError; use crate::parser::ReadUtils; -use bytes::{Buf, Bytes}; use std::net::IpAddr; /// @@ -57,7 +56,7 @@ pub struct OpenBmpHeader { pub router_group: Option, } -pub fn parse_openbmp_header(data: &mut Bytes) -> Result { +pub fn parse_openbmp_header(data: &mut &[u8]) -> Result { // read magic number let magic_number = data.read_n_bytes_to_string(4)?; if magic_number != "OBMP" { @@ -94,7 +93,7 @@ pub fn parse_openbmp_header(data: &mut Bytes) -> Result 255 { name_len = 255; @@ -102,12 +101,12 @@ pub fn parse_openbmp_header(data: &mut Bytes) -> Result Option { - if let Ok(t) = time_str.parse::() { - return chrono::NaiveDateTime::from_timestamp_opt(t as i64, 0); +fn parse_time_str(time_str: &str) -> chrono::ParseResult { + if let Ok(unix_timestamp) = time_str.parse::() { + return Ok(unix_timestamp); } - if let Ok(t) = chrono::DateTime::parse_from_rfc3339(time_str) { - return Some(t.naive_utc()); - } - None + + DateTime::parse_from_rfc3339(time_str).map(|x| x.naive_utc().timestamp() as f64) } +/// Constructors provided for some backwards compatability with the removed `Filter::new`. impl Filter { - pub fn new(filter_type: &str, filter_value: &str) -> Result { - match filter_type { - "origin_asn" => match u32::from_str(filter_value) { - Ok(v) => Ok(Filter::OriginAsn(v)), - Err(_) => Err(FilterError(format!( - "cannot parse origin asn from {}", - filter_value - ))), - }, - "prefix" => match IpNet::from_str(filter_value) { - Ok(v) => Ok(Filter::Prefix(v, PrefixMatchType::Exact)), - Err(_) => Err(FilterError(format!( - "cannot parse prefix from {}", - filter_value - ))), - }, - "prefix_super" => match IpNet::from_str(filter_value) { - Ok(v) => Ok(Filter::Prefix(v, PrefixMatchType::IncludeSuper)), - Err(_) => Err(FilterError(format!( - "cannot parse prefix from {}", - filter_value - ))), - }, - "prefix_sub" => match IpNet::from_str(filter_value) { - Ok(v) => Ok(Filter::Prefix(v, PrefixMatchType::IncludeSub)), - Err(_) => Err(FilterError(format!( - "cannot parse prefix from {}", - filter_value - ))), - }, - "prefix_super_sub" => match IpNet::from_str(filter_value) { - Ok(v) => Ok(Filter::Prefix(v, PrefixMatchType::IncludeSuperSub)), - Err(_) => Err(FilterError(format!( - "cannot parse prefix from {}", - filter_value - ))), - }, - "peer_ip" => match IpAddr::from_str(filter_value) { - Ok(v) => Ok(Filter::PeerIp(v)), - Err(_) => Err(FilterError(format!( - "cannot parse peer IP from {}", - filter_value - ))), - }, - "peer_ips" => { - let mut ips = vec![]; - for ip_str in filter_value.replace(' ', "").split(',') { - match IpAddr::from_str(ip_str) { - Ok(v) => ips.push(v), - Err(_) => { - return Err(FilterError(format!( - "cannot parse peer IP from {}", - ip_str - ))) - } - } - } - Ok(Filter::PeerIps(ips)) - } - "peer_asn" => match u32::from_str(filter_value) { - Ok(v) => Ok(Filter::PeerAsn(v)), - Err(_) => Err(FilterError(format!( - "cannot parse peer asn from {}", - filter_value - ))), - }, - "type" => match filter_value { - "w" | "withdraw" | "withdrawal" => Ok(Filter::Type(ElemType::WITHDRAW)), - "a" | "announce" | "announcement" => Ok(Filter::Type(ElemType::ANNOUNCE)), - _ => Err(FilterError(format!( - "cannot parse elem type from {}", - filter_value - ))), - }, - "ts_start" | "start_ts" => match parse_time_str(filter_value) { - Some(t) => Ok(Filter::TsStart(t.timestamp() as f64)), - None => Err(FilterError(format!( - "cannot parse TsStart filter from {}", - filter_value - ))), - }, - "ts_end" | "end_ts" => match parse_time_str(filter_value) { - Some(t) => Ok(Filter::TsEnd(t.timestamp() as f64)), - None => Err(FilterError(format!( - "cannot parse TsEnd filter from {}", - filter_value - ))), - }, - "as_path" => match Regex::from_str(filter_value) { - Ok(v) => Ok(Filter::AsPath(v)), - Err(_) => Err(FilterError(format!( - "cannot parse AS path regex from {}", - filter_value - ))), - }, - _ => Err(FilterError(format!("unknown filter type: {}", filter_type))), - } + pub fn ts_start(time_str: &str) -> chrono::ParseResult { + parse_time_str(time_str).map(Filter::TsStart) + } + + pub fn ts_end(time_str: &str) -> chrono::ParseResult { + parse_time_str(time_str).map(Filter::TsEnd) + } + + #[allow(clippy::wrong_self_convention)] + pub fn as_path(regex: &str) -> Result { + Regex::new(regex).map(Filter::AsPath) + } + + pub fn prefix(prefix: &str) -> Result { + IpNet::from_str(prefix).map(|prefix| Filter::Prefix(prefix, PrefixMatchType::Exact)) } } @@ -205,54 +123,17 @@ pub trait Filterable { fn match_filters(&self, filters: &[Filter]) -> bool; } -const fn same_family(prefix_1: &IpNet, prefix_2: &IpNet) -> bool { - matches!( - (prefix_1, prefix_2), - (IpNet::V4(_), IpNet::V4(_)) | (IpNet::V6(_), IpNet::V6(_)) - ) -} - fn prefix_match(match_prefix: &IpNet, input_prefix: &IpNet, t: &PrefixMatchType) -> bool { - let exact = input_prefix.eq(match_prefix); + if input_prefix == match_prefix { + return true; + } + match t { - PrefixMatchType::Exact => exact, - PrefixMatchType::IncludeSuper => { - if exact { - exact - } else if !same_family(match_prefix, input_prefix) { - // version not match - false - } else { - // input_prefix is super prefix of match_prefix - match_prefix.addr() >= input_prefix.addr() - && match_prefix.broadcast() <= input_prefix.broadcast() - } - } - PrefixMatchType::IncludeSub => { - if exact { - exact - } else if !same_family(match_prefix, input_prefix) { - // version not match - false - } else { - // input_prefix is sub prefix of match_prefix - match_prefix.addr() <= input_prefix.addr() - && match_prefix.broadcast() >= input_prefix.broadcast() - } - } + PrefixMatchType::Exact => false, + PrefixMatchType::IncludeSuper => input_prefix.contains(match_prefix), + PrefixMatchType::IncludeSub => match_prefix.contains(input_prefix), PrefixMatchType::IncludeSuperSub => { - if exact { - exact - } else if !same_family(match_prefix, input_prefix) { - // version not match - false - } else { - // input_prefix is super prefix of match_prefix - (match_prefix.addr() >= input_prefix.addr() - && match_prefix.broadcast() <= input_prefix.broadcast()) - || (match_prefix.addr() <= input_prefix.addr() - && match_prefix.broadcast() >= input_prefix.broadcast()) - } + input_prefix.contains(match_prefix) || match_prefix.contains(input_prefix) } } } @@ -295,13 +176,16 @@ mod tests { use super::*; use crate::BgpkitParser; use anyhow::Result; + use chrono::NaiveDateTime; + use itertools::Itertools; + use std::net::{Ipv4Addr, Ipv6Addr}; use std::str::FromStr; #[test] fn test_filter() { let url = "https://spaces.bgpkit.org/parser/update-example.gz"; let parser = BgpkitParser::new(url).unwrap(); - let elems = parser.into_elem_iter().collect::>(); + let elems: Vec = parser.into_elem_iter().try_collect().unwrap(); let filters = vec![Filter::PeerIp(IpAddr::from_str("185.1.8.65").unwrap())]; let count = elems.iter().filter(|e| e.match_filters(&filters)).count(); @@ -383,29 +267,31 @@ mod tests { #[test] fn test_parsing_time_str() { - let ts = chrono::NaiveDateTime::from_str("2021-11-20T19:49:58").unwrap(); - assert_eq!(parse_time_str("1637437798"), Some(ts)); - assert_eq!(parse_time_str("2021-11-20T19:49:58Z"), Some(ts)); - assert_eq!(parse_time_str("2021-11-20T19:49:58+00:00"), Some(ts)); - - assert_eq!(parse_time_str("2021-11-20T19:49:58"), None); - assert_eq!(parse_time_str("2021-11-20T19:49:58ZDXV"), None); - assert_eq!(parse_time_str("2021-11-20 19:49:58"), None); - assert_eq!(parse_time_str("2021-11-20"), None); + let ts = NaiveDateTime::from_str("2021-11-20T19:49:58") + .unwrap() + .timestamp() as f64; + assert_eq!(parse_time_str("1637437798").ok(), Some(ts)); + assert_eq!(parse_time_str("2021-11-20T19:49:58Z").ok(), Some(ts)); + assert_eq!(parse_time_str("2021-11-20T19:49:58+00:00").ok(), Some(ts)); + + assert_eq!(parse_time_str("2021-11-20T19:49:58").ok(), None); + assert_eq!(parse_time_str("2021-11-20T19:49:58ZDXV").ok(), None); + assert_eq!(parse_time_str("2021-11-20 19:49:58").ok(), None); + assert_eq!(parse_time_str("2021-11-20").ok(), None); } #[test] fn test_filter_iter() -> Result<()> { let url = "https://spaces.bgpkit.org/parser/update-example.gz"; let parser = BgpkitParser::new(url)? - .add_filter("peer_ip", "185.1.8.50")? - .add_filter("type", "w")?; + .add_filter(Filter::PeerIp(Ipv4Addr::new(185, 1, 8, 50).into())) + .add_filter(Filter::Type(ElemType::WITHDRAW)); let count = parser.into_elem_iter().count(); assert_eq!(count, 39); let parser = BgpkitParser::new(url)? - .add_filter("ts_start", "1637437798")? - .add_filter("ts_end", "2021-11-20T19:49:58Z")?; + .add_filter(Filter::ts_start("1637437798")?) + .add_filter(Filter::ts_end("2021-11-20T19:49:58Z")?); let count = parser.into_elem_iter().count(); assert_eq!(count, 13); Ok(()) @@ -416,8 +302,10 @@ mod tests { let url = "https://spaces.bgpkit.org/parser/update-example.gz"; let parser = BgpkitParser::new(url) .unwrap() - .add_filter("peer_ips", "185.1.8.65, 2001:7f8:73:0:3:fa4:0:1") - .unwrap(); + .add_filter(Filter::PeerIps(vec![ + Ipv4Addr::new(185, 1, 8, 65).into(), + Ipv6Addr::from([0x2001, 0x7f8, 0x73, 0x0, 0x3, 0xfa4, 0x0, 0x1]).into(), + ])); let count = parser.into_elem_iter().count(); assert_eq!(count, 3393 + 834); } diff --git a/src/parser/iters.rs b/src/parser/iters.rs index dd6f193..b69622b 100644 --- a/src/parser/iters.rs +++ b/src/parser/iters.rs @@ -3,14 +3,14 @@ Provides parser iterator implementation. */ use crate::error::ParserError; use crate::models::*; +use crate::mrt_record::try_parse_mrt_record_with_buffer; use crate::parser::BgpkitParser; use crate::{Elementor, Filterable}; -use log::{error, warn}; use std::io::Read; /// Use [ElemIterator] as the default iterator to return [BgpElem]s instead of [MrtRecord]s. impl IntoIterator for BgpkitParser { - type Item = BgpElem; + type Item = Result; type IntoIter = ElemIterator; fn into_iter(self) -> Self::IntoIter { @@ -35,6 +35,8 @@ pub struct RecordIterator { pub parser: BgpkitParser, pub count: u64, elementor: Elementor, + had_fatal_error: bool, + buffer: Vec, } impl RecordIterator { @@ -43,86 +45,48 @@ impl RecordIterator { parser, count: 0, elementor: Elementor::new(), + had_fatal_error: false, + buffer: Vec::with_capacity(4096), } } } impl Iterator for RecordIterator { - type Item = MrtRecord; + type Item = Result; + + fn next(&mut self) -> Option { + if self.had_fatal_error { + return None; + } - fn next(&mut self) -> Option { - self.count += 1; loop { - return match self.parser.next_record() { - Ok(v) => { - // if None, the reaches EoF. - let filters = &self.parser.filters; - if filters.is_empty() { - Some(v) - } else { - if let MrtMessage::TableDumpV2Message(TableDumpV2Message::PeerIndexTable( - _, - )) = &v.message - { - let _ = self.elementor.record_to_elems(v.clone()); - return Some(v); - } - let elems = self.elementor.record_to_elems(v.clone()); - if elems.iter().any(|e| e.match_filters(&self.parser.filters)) { - Some(v) - } else { - continue; - } + self.count += 1; + let record = + match try_parse_mrt_record_with_buffer(&mut self.parser.reader, &mut self.buffer) { + Ok(None) => return None, + Ok(Some(v)) => v, + Err(err @ (ParserError::IoError(_) | ParserError::UnrecognizedMrtType(_))) => { + self.had_fatal_error = true; + return Some(Err(err)); } - } - Err(e) => { - match e.error { - ParserError::TruncatedMsg(err_str) | ParserError::Unsupported(err_str) => { - if self.parser.options.show_warnings { - warn!("parser warn: {}", err_str); - } - if let Some(bytes) = e.bytes { - std::fs::write("mrt_core_dump", bytes) - .expect("Unable to write to mrt_core_dump"); - } - continue; - } - ParserError::ParseError(err_str) => { - error!("parser error: {}", err_str); - if self.parser.core_dump { - if let Some(bytes) = e.bytes { - std::fs::write("mrt_core_dump", bytes) - .expect("Unable to write to mrt_core_dump"); - } - None - } else { - continue; - } - } - ParserError::EofExpected => { - // normal end of file - None - } - ParserError::IoError(err) | ParserError::EofError(err) => { - // when reaching IO error, stop iterating - error!("{:?}", err); - if self.parser.core_dump { - if let Some(bytes) = e.bytes { - std::fs::write("mrt_core_dump", bytes) - .expect("Unable to write to mrt_core_dump"); - } - } - None - } - ParserError::OneIoError(_) - | ParserError::FilterError(_) - | ParserError::IoNotEnoughBytes() => { - // this should not happen at this stage - None - } - } - } - }; + Err(err) => return Some(Err(err)), + }; + + if self.parser.filters.is_empty() { + return Some(Ok(record)); + } + + if let MrtMessage::TableDumpV2Message(TableDumpV2Message::PeerIndexTable(_)) = + &record.message + { + let _ = self.elementor.record_to_elems(record.clone()); + return Some(Ok(record)); + } + + let elems = self.elementor.record_to_elems(record.clone()); + if elems.iter().any(|e| e.match_filters(&self.parser.filters)) { + return Some(Ok(record)); + } } } } @@ -132,64 +96,53 @@ BgpElem Iterator **********/ pub struct ElemIterator { - cache_elems: Vec, - record_iter: RecordIterator, + parser: BgpkitParser, + element_queue: Vec, elementor: Elementor, - count: u64, + had_fatal_error: bool, + buffer: Vec, } impl ElemIterator { fn new(parser: BgpkitParser) -> Self { ElemIterator { - record_iter: RecordIterator::new(parser), - count: 0, - cache_elems: vec![], + parser, + element_queue: vec![], elementor: Elementor::new(), + had_fatal_error: false, + buffer: Vec::with_capacity(4096), } } } impl Iterator for ElemIterator { - type Item = BgpElem; - - fn next(&mut self) -> Option { - self.count += 1; + type Item = Result; + fn next(&mut self) -> Option { loop { - if self.cache_elems.is_empty() { - // refill cache elems - loop { - match self.record_iter.next() { - None => { - // no more records - return None; - } - Some(r) => { - let mut elems = self.elementor.record_to_elems(r); - if elems.is_empty() { - // somehow this record does not contain any elems, continue to parse next record - continue; - } else { - elems.reverse(); - self.cache_elems = elems; - break; - } - } - } - } - // when reaching here, the `self.cache_elems` has been refilled with some more elems + if let Some(element) = self.element_queue.pop() { + return Some(Ok(element)); } - // popping cached elems. note that the original elems order is preseved by reversing the - // vector before putting it on to cache_elems. - let elem = self.cache_elems.pop(); - match elem { - None => return None, - Some(e) => match e.match_filters(&self.record_iter.parser.filters) { - true => return Some(e), - false => continue, - }, + if self.had_fatal_error { + return None; } + + let record = + match try_parse_mrt_record_with_buffer(&mut self.parser.reader, &mut self.buffer) { + Ok(None) => return None, + Ok(Some(v)) => v, + Err(err @ (ParserError::IoError(_) | ParserError::UnrecognizedMrtType(_))) => { + self.had_fatal_error = true; + return Some(Err(err)); + } + Err(err) => return Some(Err(err)), + }; + + let new_elements = self.elementor.record_to_elems(record); + self.element_queue.extend(new_elements.into_iter().rev()); + self.element_queue + .retain(|element| element.match_filters(&self.parser.filters)); } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f2f700f..321370e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -21,9 +21,9 @@ pub(crate) use mrt::{parse_bgp4mp, parse_table_dump_message, parse_table_dump_v2 use crate::models::MrtRecord; use filter::Filter; pub use mrt::mrt_elem::Elementor; -use oneio::{get_cache_reader, get_reader}; +use oneio::{get_cache_reader, get_reader, OneIoError}; -pub use crate::error::{ParserError, ParserErrorWithBytes}; +pub use crate::error::ParserError; pub use bmp::{parse_bmp_msg, parse_openbmp_header, parse_openbmp_msg}; pub use filter::*; pub use iters::*; @@ -52,7 +52,7 @@ impl Default for ParserOptions { impl BgpkitParser> { /// Creating a new parser from a object that implements [Read] trait. - pub fn new(path: &str) -> Result { + pub fn new(path: &str) -> Result { let reader = get_reader(path)?; Ok(BgpkitParser { reader, @@ -67,7 +67,7 @@ impl BgpkitParser> { /// The cache file name is generated by the following format: `cache--`. /// For example, the remote file `http://archive.routeviews.org/route-views.chile/bgpdata/2023.03/RIBS/rib.20230326.0600.bz2` /// will be cached as `cache-682cb1eb-rib.20230326.0600.bz2` in the cache directory. - pub fn new_cached(path: &str, cache_dir: &str) -> Result { + pub fn new_cached(path: &str, cache_dir: &str) -> Result { let file_name = path.rsplit('/').next().unwrap().to_string(); let new_file_name = format!( "cache-{}", @@ -96,6 +96,36 @@ fn add_suffix_to_filename(filename: &str, suffix: &str) -> String { } } +/// A CRC32 implementation that converts a string to a hex string. +/// +/// CRC32 is a checksum algorithm that is used to verify the integrity of data. It is short in +/// length and sufficient for generating unique file names based on remote URLs. +pub fn crc32(input: &str) -> String { + let input_bytes = input.as_bytes(); + let mut table = [0u32; 256]; + let polynomial = 0xedb88320u32; + + for i in 0..256 { + let mut crc = i as u32; + for _ in 0..8 { + if crc & 1 == 1 { + crc = (crc >> 1) ^ polynomial; + } else { + crc >>= 1; + } + } + table[i as usize] = crc; + } + + let mut crc = !0u32; + for byte in input_bytes.iter() { + let index = ((crc ^ (*byte as u32)) & 0xff) as usize; + crc = (crc >> 8) ^ table[index]; + } + + format!("{:08x}", !crc) +} + impl BgpkitParser { /// Creating a new parser from a object that implements [Read] trait. pub fn from_reader(reader: R) -> Self { @@ -108,8 +138,8 @@ impl BgpkitParser { } /// This is used in for loop `for item in parser{}` - pub fn next_record(&mut self) -> Result { - parse_mrt_record(&mut self.reader) + pub fn next_record(&mut self) -> Result, ParserError> { + try_parse_mrt_record(&mut self.reader) } } @@ -134,19 +164,9 @@ impl BgpkitParser { } } - pub fn add_filter( - self, - filter_type: &str, - filter_value: &str, - ) -> Result { - let mut filters = self.filters; - filters.push(Filter::new(filter_type, filter_value)?); - Ok(BgpkitParser { - reader: self.reader, - core_dump: self.core_dump, - filters, - options: self.options, - }) + pub fn add_filter(mut self, filter: Filter) -> Self { + self.filters.push(filter); + self } } diff --git a/src/parser/mrt/messages/bgp4mp.rs b/src/parser/mrt/messages/bgp4mp.rs index 7d1e6ee..fef9278 100644 --- a/src/parser/mrt/messages/bgp4mp.rs +++ b/src/parser/mrt/messages/bgp4mp.rs @@ -2,14 +2,13 @@ use crate::error::ParserError; use crate::models::*; use crate::parser::bgp::messages::parse_bgp_message; use crate::parser::ReadUtils; -use bytes::{Buf, Bytes}; use std::convert::TryFrom; /// Parse MRT BGP4MP type /// /// RFC: /// -pub fn parse_bgp4mp(sub_type: u16, input: Bytes) -> Result { +pub fn parse_bgp4mp(sub_type: u16, input: &[u8]) -> Result { let bgp4mp_type: Bgp4MpType = Bgp4MpType::try_from(sub_type)?; let msg: Bgp4Mp = match bgp4mp_type { @@ -40,7 +39,7 @@ pub fn parse_bgp4mp(sub_type: u16, input: Bytes) -> Result Ok(msg) } -fn total_should_read(afi: &Afi, asn_len: &AsnLength, total_size: usize) -> usize { +fn total_should_read(afi: &Afi, asn_len: AsnLength, total_size: usize) -> usize { let ip_size = match afi { Afi::Ipv4 => 4 * 2, Afi::Ipv6 => 16 * 2, @@ -67,7 +66,7 @@ fn total_should_read(afi: &Afi, asn_len: &AsnLength, total_size: usize) -> usize +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ pub fn parse_bgp4mp_message( - mut data: Bytes, + mut data: &[u8], add_path: bool, asn_len: AsnLength, msg_type: &Bgp4MpType, @@ -81,15 +80,9 @@ pub fn parse_bgp4mp_message( let peer_ip = data.read_address(&afi)?; let local_ip = data.read_address(&afi)?; - let should_read = total_should_read(&afi, &asn_len, total_size); - if should_read != data.remaining() { - return Err(ParserError::TruncatedMsg(format!( - "truncated bgp4mp message: should read {} bytes, have {} bytes available", - should_read, - data.remaining() - ))); - } - let bgp_message: BgpMessage = parse_bgp_message(&mut data, add_path, &asn_len)?; + let should_read = total_should_read(&afi, asn_len, total_size); + data.expect_remaining_eq(should_read, "bgp4mp message")?; + let bgp_message: BgpMessage = parse_bgp_message(&mut data, add_path, asn_len)?; Ok(Bgp4MpMessage { msg_type: *msg_type, @@ -134,7 +127,7 @@ pub fn parse_bgp4mp_message( +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ pub fn parse_bgp4mp_state_change( - mut input: Bytes, + mut input: &[u8], asn_len: AsnLength, msg_type: &Bgp4MpType, ) -> Result { diff --git a/src/parser/mrt/messages/table_dump_message.rs b/src/parser/mrt/messages/table_dump_message.rs index 46e7d55..1cda966 100644 --- a/src/parser/mrt/messages/table_dump_message.rs +++ b/src/parser/mrt/messages/table_dump_message.rs @@ -2,7 +2,6 @@ use crate::error::*; use crate::models::*; use crate::parser::bgp::attributes::AttributeParser; use crate::parser::ReadUtils; -use bytes::Bytes; use std::net::IpAddr; /// Parse MRT TABLE_DUMP type message. @@ -36,7 +35,7 @@ use std::net::IpAddr; /// ``` pub fn parse_table_dump_message( sub_type: u16, - mut data: Bytes, + mut data: &[u8], ) -> Result { // #### // Step 0. prepare @@ -45,16 +44,7 @@ pub fn parse_table_dump_message( // - create data slice reader cursor // determine address family based on the sub_type value defined in the MRT [CommonHeader]. - let afi = match sub_type { - 1 => Afi::Ipv4, - 2 => Afi::Ipv6, - _ => { - return Err(ParserError::ParseError(format!( - "Invalid subtype found for TABLE_DUMP (V1) message: {}", - sub_type - ))) - } - }; + let afi = Afi::try_from(sub_type)?; // #### // Step 1. read simple fields @@ -90,12 +80,12 @@ pub fn parse_table_dump_message( let attr_parser = AttributeParser::new(false); - data.has_n_remaining(attribute_length)?; - let attr_data_slice = data.split_to(attribute_length); + data.require_n_remaining(attribute_length, "rib attributes")?; + let attr_data_slice = data.split_to(attribute_length)?; // for TABLE_DUMP type, the AS number length is always 2-byte. let attributes = - attr_parser.parse_attributes(attr_data_slice, &AsnLength::Bits16, None, None, None)?; + attr_parser.parse_attributes(attr_data_slice, AsnLength::Bits16, None, None, None)?; Ok(TableDumpMessage { view_number, diff --git a/src/parser/mrt/messages/table_dump_v2_message.rs b/src/parser/mrt/messages/table_dump_v2_message.rs index 8febcd4..6829ab9 100644 --- a/src/parser/mrt/messages/table_dump_v2_message.rs +++ b/src/parser/mrt/messages/table_dump_v2_message.rs @@ -1,8 +1,6 @@ use crate::error::ParserError; use crate::models::*; use crate::parser::{AttributeParser, ReadUtils}; -use bytes::{Buf, Bytes}; -use log::warn; use std::collections::HashMap; use std::convert::TryFrom; use std::net::{IpAddr, Ipv4Addr}; @@ -21,7 +19,7 @@ use std::net::{IpAddr, Ipv4Addr}; /// pub fn parse_table_dump_v2_message( sub_type: u16, - mut input: Bytes, + mut input: &[u8], ) -> Result { let v2_type: TableDumpV2Type = TableDumpV2Type::try_from(sub_type)?; @@ -43,9 +41,10 @@ pub fn parse_table_dump_v2_message( TableDumpV2Type::RibGeneric | TableDumpV2Type::RibGenericAddPath | TableDumpV2Type::GeoPeerTable => { - return Err(ParserError::Unsupported( - "TableDumpV2 RibGeneric and GeoPeerTable is not currently supported".to_string(), - )) + return Err(ParserError::UnsupportedMrtType { + mrt_type: EntryType::TABLE_DUMP_V2, + subtype: sub_type, + }); } }; @@ -55,8 +54,8 @@ pub fn parse_table_dump_v2_message( /// Peer index table /// /// RFC: https://www.rfc-editor.org/rfc/rfc6396#section-4.3.1 -pub fn parse_peer_index_table(mut data: Bytes) -> Result { - let collector_bgp_id = Ipv4Addr::from(data.read_u32()?); +pub fn parse_peer_index_table(mut data: &[u8]) -> Result { + let collector_bgp_id = data.read_ipv4_address()?; // read and ignore view name let view_name_length = data.read_u16()?; let view_name = @@ -105,34 +104,26 @@ pub fn parse_peer_index_table(mut data: Bytes) -> Result Result { - let afi: Afi; - let safi: Safi; - match rib_type { + let (afi, safi) = match rib_type { TableDumpV2Type::RibIpv4Unicast | TableDumpV2Type::RibIpv4UnicastAddPath => { - afi = Afi::Ipv4; - safi = Safi::Unicast + (Afi::Ipv4, Safi::Unicast) } TableDumpV2Type::RibIpv4Multicast | TableDumpV2Type::RibIpv4MulticastAddPath => { - afi = Afi::Ipv4; - safi = Safi::Multicast + (Afi::Ipv4, Safi::Multicast) } TableDumpV2Type::RibIpv6Unicast | TableDumpV2Type::RibIpv6UnicastAddPath => { - afi = Afi::Ipv6; - safi = Safi::Unicast + (Afi::Ipv6, Safi::Unicast) } TableDumpV2Type::RibIpv6Multicast | TableDumpV2Type::RibIpv6MulticastAddPath => { - afi = Afi::Ipv6; - safi = Safi::Multicast - } - _ => { - return Err(ParserError::ParseError(format!( - "wrong RIB type for parsing: {:?}", - rib_type - ))) + (Afi::Ipv6, Safi::Multicast) } + ty => panic!( + "Invalid TableDumpV2Type {:?} passed to parse_rib_afi_entries", + ty + ), }; let add_path = matches!( @@ -148,7 +139,6 @@ pub fn parse_rib_afi_entries( // NOTE: here we parse the prefix as only length and prefix, the path identifier for add_path // entry is not handled here. We follow RFC6396 here https://www.rfc-editor.org/rfc/rfc6396.html#section-4.3.2 let prefix = data.read_nlri_prefix(&afi, false)?; - let prefixes = vec![prefix]; let entry_count = data.read_u16()?; let mut rib_entries = Vec::with_capacity((entry_count * 2) as usize); @@ -157,14 +147,7 @@ pub fn parse_rib_afi_entries( // let attr_data_slice = &input.into_inner()[(input.position() as usize)..]; for _i in 0..entry_count { - let entry = match parse_rib_entry(data, add_path, &afi, &safi, &prefixes) { - Ok(entry) => entry, - Err(e) => { - warn!("early break due to error {}", e.to_string()); - break; - } - }; - rib_entries.push(entry); + rib_entries.push(parse_rib_entry(data, add_path, afi, safi, &prefix)?); } Ok(RibAfiEntries { @@ -176,38 +159,35 @@ pub fn parse_rib_afi_entries( } pub fn parse_rib_entry( - input: &mut Bytes, + input: &mut &[u8], add_path: bool, - afi: &Afi, - safi: &Safi, - prefixes: &[NetworkPrefix], + afi: Afi, + safi: Safi, + prefix: &NetworkPrefix, ) -> Result { - if input.remaining() < 8 { - // total length - current position less than 16 -- - // meaning less than 16 bytes available to read - return Err(ParserError::TruncatedMsg("truncated msg".to_string())); - } + // total length - current position less than 16 -- + // meaning less than 16 bytes available to read + input.require_n_remaining(8, "rib entry")?; let peer_index = input.read_u16()?; let originated_time = input.read_u32()?; if add_path { + // TODO: Why is this value unused? let _path_id = input.read_u32()?; } let attribute_length = input.read_u16()? as usize; - if input.remaining() < attribute_length { - return Err(ParserError::TruncatedMsg("truncated msg".to_string())); - } + input.require_n_remaining(attribute_length, "rib entry attributes")?; let attr_parser = AttributeParser::new(add_path); - let attr_data_slice = input.split_to(attribute_length); + let attr_data_slice = input.split_to(attribute_length)?; let attributes = attr_parser.parse_attributes( attr_data_slice, - &AsnLength::Bits32, - Some(*afi), - Some(*safi), - Some(prefixes), + AsnLength::Bits32, + Some(afi), + Some(safi), + Some(prefix), )?; Ok(RibEntry { diff --git a/src/parser/mrt/mod.rs b/src/parser/mrt/mod.rs index dff117d..6836259 100644 --- a/src/parser/mrt/mod.rs +++ b/src/parser/mrt/mod.rs @@ -10,4 +10,4 @@ pub mod mrt_record; pub use messages::bgp4mp::parse_bgp4mp; pub use messages::table_dump_message::parse_table_dump_message; pub use messages::table_dump_v2_message::parse_table_dump_v2_message; -pub use mrt_record::parse_mrt_record; +pub use mrt_record::{parse_mrt_record, try_parse_mrt_record}; diff --git a/src/parser/mrt/mrt_elem.rs b/src/parser/mrt/mrt_elem.rs index 3a2a72b..34f0635 100644 --- a/src/parser/mrt/mrt_elem.rs +++ b/src/parser/mrt/mrt_elem.rs @@ -40,8 +40,8 @@ fn get_relevant_attributes( Option>, bool, Option<(Asn, BgpIdentifier)>, - Option, - Option, + Option, + Option, Option, Option>, Option>, @@ -222,7 +222,7 @@ impl Elementor { })); if let Some(nlri) = announced { - elems.extend(nlri.prefixes.into_iter().map(|p| BgpElem { + elems.extend(nlri.into_iter_with_path_id().map(|p| BgpElem { timestamp, elem_type: ElemType::ANNOUNCE, peer_ip: *peer_ip, @@ -265,7 +265,7 @@ impl Elementor { deprecated: None, })); if let Some(nlri) = withdrawn { - elems.extend(nlri.prefixes.into_iter().map(|p| BgpElem { + elems.extend(nlri.into_iter_with_path_id().map(|p| BgpElem { timestamp, elem_type: ElemType::WITHDRAW, peer_ip: *peer_ip, @@ -388,23 +388,7 @@ impl Elementor { }; let next = match next_hop { - None => { - if let Some(v) = announced { - if let Some(h) = v.next_hop { - match h { - NextHopAddress::Ipv4(v) => Some(IpAddr::from(v)), - NextHopAddress::Ipv6(v) => Some(IpAddr::from(v)), - NextHopAddress::Ipv6LinkLocal(v, _) => { - Some(IpAddr::from(v)) - } - } - } else { - None - } - } else { - None - } - } + None => announced.as_ref().map(ReachableNlri::next_hop_addr), Some(v) => Some(v), }; diff --git a/src/parser/mrt/mrt_record.rs b/src/parser/mrt/mrt_record.rs index 6c80197..a83db43 100644 --- a/src/parser/mrt/mrt_record.rs +++ b/src/parser/mrt/mrt_record.rs @@ -1,9 +1,8 @@ use crate::error::ParserError; use crate::models::*; use crate::parser::{ - parse_bgp4mp, parse_table_dump_message, parse_table_dump_v2_message, ParserErrorWithBytes, + parse_bgp4mp, parse_table_dump_message, parse_table_dump_v2_message, ReadUtils, }; -use bytes::{Buf, Bytes, BytesMut}; use std::convert::TryFrom; use std::io::Read; @@ -44,20 +43,24 @@ use std::io::Read; pub fn parse_common_header(input: &mut T) -> Result { let mut raw_bytes = [0u8; 12]; input.read_exact(&mut raw_bytes)?; - let mut data = BytesMut::from(&raw_bytes[..]); + let mut data = &raw_bytes[..]; - let timestamp = data.get_u32(); - let entry_type_raw = data.get_u16(); - let entry_type = EntryType::try_from(entry_type_raw)?; - let entry_subtype = data.get_u16(); - let mut length = data.get_u32(); + let timestamp = data.read_u32()?; + let entry_type_raw = data.read_u16()?; + let entry_type = match EntryType::try_from(entry_type_raw) { + Ok(v) => v, + Err(_) => return Err(ParserError::UnrecognizedMrtType(entry_type_raw)), + }; + let entry_subtype = data.read_u16()?; + let mut length = data.read_u32()?; let microsecond_timestamp = match &entry_type { EntryType::BGP4MP_ET => { + // TODO: Error if length < 4 length -= 4; let mut raw_bytes: [u8; 4] = [0; 4]; input.read_exact(&mut raw_bytes)?; - Some(BytesMut::from(&raw_bytes[..]).get_u32()) + Some(u32::from_be_bytes(raw_bytes)) } _ => None, }; @@ -71,66 +74,73 @@ pub fn parse_common_header(input: &mut T) -> Result Result { - // parse common header - let common_header = match parse_common_header(input) { - Ok(v) => v, - Err(e) => { - if let ParserError::EofError(e) = &e { - if e.kind() == std::io::ErrorKind::UnexpectedEof { - return Err(ParserErrorWithBytes::from(ParserError::EofExpected)); - } - } - return Err(ParserErrorWithBytes { - error: e, - bytes: None, - }); +/// An alternative to [parse_common_header] which returns `None` if the end of the file is reached +/// upon beginning to read the header. +pub fn try_parse_common_header( + input: &mut T, +) -> Result, ParserError> { + let mut first_byte = [0]; + match input.read(&mut first_byte)? { + 0 => Ok(None), + 1 => { + let mut reader = &first_byte[..]; + parse_common_header(&mut Read::chain(&mut reader, input)).map(Some) } + _ => unreachable!("Can only read 0 or 1 bytes into buffer of length 1 "), + } +} + +pub fn try_parse_mrt_record(input: &mut T) -> Result, ParserError> { + let mut buffer = Vec::new(); + try_parse_mrt_record_with_buffer(input, &mut buffer) +} + +pub fn try_parse_mrt_record_with_buffer( + input: &mut T, + buffer: &mut Vec, +) -> Result, ParserError> { + // parse common header + let common_header = match try_parse_common_header(input)? { + Some(v) => v, + None => return Ok(None), }; // read the whole message bytes to buffer - let mut buffer = BytesMut::with_capacity(common_header.length as usize); - buffer.resize(common_header.length as usize, 0); - match input - .take(common_header.length as u64) - .read_exact(&mut buffer) - { - Ok(_) => {} - Err(e) => { - return Err(ParserErrorWithBytes { - error: ParserError::IoError(e), - bytes: None, - }) - } + if buffer.len() < common_header.length as usize { + buffer.resize(common_header.length as usize, 0); } + input.read_exact(&mut buffer[..common_header.length as usize])?; - match parse_mrt_body( - common_header.entry_type as u16, + let message = parse_mrt_body( + common_header.entry_type, common_header.entry_subtype, - buffer.freeze(), // freeze the BytesMute to Bytes - ) { - Ok(message) => Ok(MrtRecord { - common_header, - message, - }), - Err(e) => { - // TODO: find more efficient way to preserve the bytes during error - // let mut total_bytes = vec![]; - // if common_header.write_header(&mut total_bytes).is_err() { - // unreachable!("Vec will never produce errors when used as a std::io::Write") - // } + &buffer[..common_header.length as usize], + )?; - // total_bytes.extend(buffer); - // Err(ParserErrorWithBytes { - // error: e, - // bytes: Some(total_bytes), - // }) - Err(ParserErrorWithBytes { - error: e, - bytes: None, - }) - } - } + Ok(Some(MrtRecord { + common_header, + message, + })) +} + +pub fn parse_mrt_record(input: &mut T) -> Result { + // parse common header + let common_header = parse_common_header(input)?; + + // read the whole message bytes to buffer + let mut buffer = vec![0; common_header.length as usize]; + input.read_exact(&mut buffer)?; + + let message = parse_mrt_body( + common_header.entry_type, + common_header.entry_subtype, + &buffer, + )?; + + Ok(MrtRecord { + common_header, + message, + }) } /// Parse MRT message body with given entry type and subtype. @@ -139,47 +149,29 @@ pub fn parse_mrt_record(input: &mut impl Read) -> Result Result { - let etype = EntryType::try_from(entry_type)?; - - let message: MrtMessage = match &etype { + match entry_type { EntryType::TABLE_DUMP => { - let msg = parse_table_dump_message(entry_subtype, data); - match msg { - Ok(msg) => MrtMessage::TableDumpMessage(msg), - Err(e) => { - return Err(e); - } - } + let msg = parse_table_dump_message(entry_subtype, data)?; + Ok(MrtMessage::TableDumpMessage(msg)) } EntryType::TABLE_DUMP_V2 => { - let msg = parse_table_dump_v2_message(entry_subtype, data); - match msg { - Ok(msg) => MrtMessage::TableDumpV2Message(msg), - Err(e) => { - return Err(e); - } - } + let msg = parse_table_dump_v2_message(entry_subtype, data)?; + Ok(MrtMessage::TableDumpV2Message(msg)) } EntryType::BGP4MP | EntryType::BGP4MP_ET => { - let msg = parse_bgp4mp(entry_subtype, data); - match msg { - Ok(msg) => MrtMessage::Bgp4Mp(msg), - Err(e) => { - return Err(e); - } - } + let msg = parse_bgp4mp(entry_subtype, data)?; + Ok(MrtMessage::Bgp4Mp(msg)) } - v => { + mrt_type => { // deprecated - return Err(ParserError::Unsupported(format!( - "unsupported MRT type: {:?}", - v - ))); + Err(ParserError::UnsupportedMrtType { + mrt_type, + subtype: entry_subtype, + }) } - }; - Ok(message) + } } diff --git a/src/parser/rislive/error.rs b/src/parser/rislive/error.rs index a64687c..dd4c855 100644 --- a/src/parser/rislive/error.rs +++ b/src/parser/rislive/error.rs @@ -1,58 +1,22 @@ -use std::convert; -use std::error::Error; -use std::fmt::{Display, Formatter}; +use crate::ParserError; +use thiserror::Error; -#[derive(Debug)] +#[derive(Debug, Error)] pub enum ParserRisliveError { - IncorrectJson(String), - IncorrectRawBytes, - IrregularRisLiveFormat, - UnsupportedMessage, + #[error(transparent)] + IncorrectJson(#[from] serde_json::Error), + #[error("unable to parse aggregator attribute {0:?}")] + UnableToParseAggregator(String), + #[error("unable to parse raw bytes: {0}")] + UnableToParseRawBytes(ParserError), + #[error("unknown message type: {0:?}")] + UnknownMessageType(Option), + #[error("unsupported message type: {0}")] + UnsupportedMessage(String), + #[error("found 'eor' (End of RIB) prefix")] ElemEndOfRibPrefix, - ElemUnknownOriginType(String), - ElemIncorrectAggregator(String), - ElemIncorrectPrefix(String), - ElemIncorrectIp(String), + #[error("unknown origin type: {0}")] + UnknownOriginType(String), + #[error("unable to parse prefix: {0:?}")] + UnableToParsePrefix(String), } - -impl Display for ParserRisliveError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - ParserRisliveError::IncorrectJson(msg) => { - write!(f, "incorrect json message: {}", msg) - } - ParserRisliveError::IncorrectRawBytes => { - write!(f, "incorrect raw bytes") - } - ParserRisliveError::UnsupportedMessage => { - write!(f, "unsupported message") - } - ParserRisliveError::IrregularRisLiveFormat => { - write!(f, "irregular ris live format") - } - ParserRisliveError::ElemIncorrectPrefix(msg) => { - write!(f, "incorrect prefix string: {}", msg) - } - ParserRisliveError::ElemUnknownOriginType(msg) => { - write!(f, "unknown origin type: {}", msg) - } - ParserRisliveError::ElemIncorrectAggregator(msg) => { - write!(f, "incorrect aggregator string: {}", msg) - } - ParserRisliveError::ElemIncorrectIp(msg) => { - write!(f, "incorrect IP string: {}", msg) - } - ParserRisliveError::ElemEndOfRibPrefix => { - write!(f, "found 'eor' (End of RIB) prefix") - } - } - } -} - -impl convert::From for ParserRisliveError { - fn from(_: serde_json::Error) -> Self { - ParserRisliveError::IncorrectJson("serde_json error".to_string()) - } -} - -impl Error for ParserRisliveError {} diff --git a/src/parser/rislive/messages/raw_bytes.rs b/src/parser/rislive/messages/raw_bytes.rs index 06c58e8..5cfaf84 100644 --- a/src/parser/rislive/messages/raw_bytes.rs +++ b/src/parser/rislive/messages/raw_bytes.rs @@ -2,7 +2,6 @@ use crate::models::*; use crate::parser::bgp::parse_bgp_message; use crate::parser::rislive::error::ParserRisliveError; use crate::Elementor; -use bytes::Bytes; use serde_json::Value; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; @@ -10,21 +9,25 @@ use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; pub fn parse_raw_bytes(msg_str: &str) -> Result, ParserRisliveError> { let msg: Value = serde_json::from_str(msg_str)?; let msg_type = match msg.get("type") { - None => return Err(ParserRisliveError::IrregularRisLiveFormat), + None => return Err(ParserRisliveError::UnknownMessageType(None)), Some(t) => t.as_str().unwrap(), }; match msg_type { "ris_message" => {} "ris_error" | "ris_rrc_list" | "ris_subscribe_ok" | "pong" => { - return Err(ParserRisliveError::UnsupportedMessage) + return Err(ParserRisliveError::UnsupportedMessage(msg_type.to_string())) + } + _ => { + return Err(ParserRisliveError::UnknownMessageType(Some( + msg_type.to_string(), + ))) } - _ => return Err(ParserRisliveError::IrregularRisLiveFormat), } let data = msg.get("data").unwrap().as_object().unwrap(); - let mut bytes = Bytes::from(hex::decode(data.get("raw").unwrap().as_str().unwrap()).unwrap()); + let bytes = hex::decode(data.get("raw").unwrap().as_str().unwrap()).unwrap(); let timestamp = data.get("timestamp").unwrap().as_f64().unwrap(); let peer_str = data.get("peer").unwrap().as_str().unwrap().to_owned(); @@ -39,11 +42,11 @@ pub fn parse_raw_bytes(msg_str: &str) -> Result, ParserRisliveError let peer_asn = peer_asn_str.parse::().unwrap(); - let bgp_msg = match parse_bgp_message(&mut bytes, false, &AsnLength::Bits32) { + let bgp_msg = match parse_bgp_message(&mut &bytes[..], false, AsnLength::Bits32) { Ok(m) => m, - Err(_) => match parse_bgp_message(&mut bytes, false, &AsnLength::Bits16) { + Err(_) => match parse_bgp_message(&mut &bytes[..], false, AsnLength::Bits16) { Ok(m) => m, - Err(_) => return Err(ParserRisliveError::IncorrectRawBytes), + Err(err) => return Err(ParserRisliveError::UnableToParseRawBytes(err)), }, }; diff --git a/src/parser/rislive/mod.rs b/src/parser/rislive/mod.rs index 917048f..18c9d91 100644 --- a/src/parser/rislive/mod.rs +++ b/src/parser/rislive/mod.rs @@ -48,25 +48,10 @@ use std::net::Ipv4Addr; pub mod error; pub mod messages; -// simple macro to make the code look a bit nicer -macro_rules! unwrap_or_return { - ( $e:expr, $msg_string:expr ) => { - match $e { - Ok(x) => x, - Err(_) => return Err(ParserRisliveError::IncorrectJson($msg_string)), - } - }; -} - /// This function parses one message and returns a result of a vector of [BgpElem]s or an error pub fn parse_ris_live_message(msg_str: &str) -> Result, ParserRisliveError> { - let msg_string = msg_str.to_string(); - // parse RIS Live message to internal struct using serde. - let msg: RisLiveMessage = match serde_json::from_str(msg_str) { - Ok(m) => m, - Err(_e) => return Err(ParserRisliveError::IncorrectJson(msg_string)), - }; + let msg: RisLiveMessage = serde_json::from_str(msg_str)?; match msg { RisLiveMessage::RisMessage(ris_msg) => { @@ -107,7 +92,7 @@ pub fn parse_ris_live_message(msg_str: &str) -> Result, ParserRisli "egp" | "EGP" => Origin::EGP, "incomplete" | "INCOMPLETE" => Origin::INCOMPLETE, other => { - return Err(ParserRisliveError::ElemUnknownOriginType( + return Err(ParserRisliveError::UnknownOriginType( other.to_string(), )); } @@ -120,15 +105,19 @@ pub fn parse_ris_live_message(msg_str: &str) -> Result, ParserRisli Some(aggr_str) => { let (asn_str, ip_str) = match aggr_str.split_once(':') { None => { - return Err(ParserRisliveError::ElemIncorrectAggregator( + return Err(ParserRisliveError::UnableToParseAggregator( aggr_str, )) } Some(v) => v, }; - let asn = unwrap_or_return!(asn_str.parse::(), msg_string); - let ip = unwrap_or_return!(ip_str.parse::(), msg_string); + let asn = asn_str.parse::().map_err(|_| { + ParserRisliveError::UnableToParseAggregator(aggr_str.to_owned()) + })?; + let ip = ip_str.parse::().map_err(|_| { + ParserRisliveError::UnableToParseAggregator(aggr_str) + })?; (Some(asn), Some(ip)) } }; @@ -143,7 +132,7 @@ pub fn parse_ris_live_message(msg_str: &str) -> Result, ParserRisli if prefix == "eor" { return Err(ParserRisliveError::ElemEndOfRibPrefix); } - return Err(ParserRisliveError::ElemIncorrectPrefix( + return Err(ParserRisliveError::UnableToParsePrefix( prefix.to_string(), )); } diff --git a/src/parser/utils.rs b/src/parser/utils.rs index 7b10f83..0ce82f0 100644 --- a/src/parser/utils.rs +++ b/src/parser/utils.rs @@ -1,110 +1,171 @@ /*! Provides IO utility functions for read bytes of different length and converting to corresponding structs. */ -use ipnet::{IpNet, Ipv4Net, Ipv6Net}; +use ipnet::{IpNet, Ipv4Net, Ipv6Net, PrefixLenError}; use std::convert::TryFrom; -use std::{ - io, - net::{Ipv4Addr, Ipv6Addr}, -}; +use std::net::{Ipv4Addr, Ipv6Addr}; use crate::models::*; -use bytes::{Buf, Bytes}; -use log::debug; use std::net::IpAddr; use crate::error::ParserError; -use crate::ParserError::IoNotEnoughBytes; -impl ReadUtils for Bytes {} +#[cold] +fn eof(name: &'static str, expected: usize, found: usize) -> ParserError { + ParserError::InconsistentFieldLength { + name, + expected, + found, + } +} + +impl ReadUtils for &'_ [u8] { + #[inline] + fn remaining(&self) -> usize { + self.len() + } -// Allow reading IPs from Reads -pub trait ReadUtils: Buf { #[inline] - fn has_n_remaining(&self, n: usize) -> Result<(), ParserError> { - if self.remaining() < n { - Err(IoNotEnoughBytes()) - } else { - Ok(()) + fn advance(&mut self, x: usize) -> Result<(), ParserError> { + if self.len() >= x { + *self = &self[x..]; + return Ok(()); } + + Err(eof("advance", x, self.len())) } #[inline] + fn split_to(&mut self, n: usize) -> Result { + if self.len() >= n { + let (a, b) = self.split_at(n); + *self = b; + return Ok(a); + } + + Err(eof("split_to", n, self.len())) + } + + #[inline(always)] fn read_u8(&mut self) -> Result { - self.has_n_remaining(1)?; - Ok(self.get_u8()) + if !self.is_empty() { + let value = self[0]; + *self = &self[1..]; + return Ok(value); + } + + Err(eof("read_u8", 1, 0)) } - #[inline] + #[inline(always)] fn read_u16(&mut self) -> Result { - self.has_n_remaining(2)?; - Ok(self.get_u16()) + if self.len() >= 2 { + let (bytes, remaining) = self.split_at(2); + *self = remaining; + return Ok(u16::from_be_bytes(bytes.try_into().unwrap())); + } + + Err(eof("read_u16", 2, self.len())) } - #[inline] + #[inline(always)] fn read_u32(&mut self) -> Result { - self.has_n_remaining(4)?; - Ok(self.get_u32()) + if self.len() >= 4 { + let (bytes, remaining) = self.split_at(4); + *self = remaining; + return Ok(u32::from_be_bytes(bytes.try_into().unwrap())); + } + + Err(eof("read_u32", 4, self.len())) } - #[inline] + #[inline(always)] fn read_u64(&mut self) -> Result { - self.has_n_remaining(8)?; - Ok(self.get_u64()) + if self.len() >= 8 { + let (bytes, remaining) = self.split_at(8); + *self = remaining; + return Ok(u64::from_be_bytes(bytes.try_into().unwrap())); + } + + Err(eof("read_u64", 8, self.len())) + } + + #[inline] + fn read_exact(&mut self, buffer: &mut [u8]) -> Result<(), ParserError> { + match std::io::Read::read_exact(self, buffer) { + Ok(_) => Ok(()), + Err(_) => Err(eof("read_exact", buffer.len(), self.len())), + } + } +} + +// Allow reading IPs from Reads +pub trait ReadUtils: Sized { + fn remaining(&self) -> usize; + fn advance(&mut self, x: usize) -> Result<(), ParserError>; + fn split_to(&mut self, n: usize) -> Result; + fn read_u8(&mut self) -> Result; + fn read_u16(&mut self) -> Result; + fn read_u32(&mut self) -> Result; + fn read_u64(&mut self) -> Result; + fn read_exact(&mut self, buffer: &mut [u8]) -> Result<(), ParserError>; + + /// Check that the buffer has at least n bytes remaining. This can help the compiler optimize + /// away bounds checks. + #[inline(always)] + fn require_n_remaining(&self, n: usize, target: &'static str) -> Result<(), ParserError> { + if self.remaining() >= n { + return Ok(()); + } + + Err(eof(target, n, self.remaining())) } - fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), ParserError> { - self.has_n_remaining(buf.len())?; - self.copy_to_slice(buf); - Ok(()) + #[inline(always)] + fn expect_remaining_eq(&self, n: usize, target: &'static str) -> Result<(), ParserError> { + if self.remaining() == n { + return Ok(()); + } + + Err(ParserError::InconsistentFieldLength { + name: target, + expected: n, + found: self.remaining(), + }) } - fn read_address(&mut self, afi: &Afi) -> io::Result { + fn read_address(&mut self, afi: &Afi) -> Result { match afi { - Afi::Ipv4 => match self.read_ipv4_address() { - Ok(ip) => Ok(IpAddr::V4(ip)), - _ => Err(io::Error::new( - io::ErrorKind::Other, - "Cannot parse IPv4 address".to_string(), - )), - }, - Afi::Ipv6 => match self.read_ipv6_address() { - Ok(ip) => Ok(IpAddr::V6(ip)), - _ => Err(io::Error::new( - io::ErrorKind::Other, - "Cannot parse IPv6 address".to_string(), - )), - }, + Afi::Ipv4 => self.read_ipv4_address().map(IpAddr::V4), + Afi::Ipv6 => self.read_ipv6_address().map(IpAddr::V6), } } fn read_ipv4_address(&mut self) -> Result { + self.require_n_remaining(4, "IPv4 Address")?; let addr = self.read_u32()?; Ok(Ipv4Addr::from(addr)) } fn read_ipv6_address(&mut self) -> Result { - self.has_n_remaining(16)?; - let buf = self.get_u128(); - Ok(Ipv6Addr::from(buf)) + self.require_n_remaining(16, "IPv6 Address")?; + let mut buffer = [0; 16]; + self.read_exact(&mut buffer)?; + Ok(Ipv6Addr::from(buffer)) } fn read_ipv4_prefix(&mut self) -> Result { + self.require_n_remaining(5, "IPv4 Prefix")?; let addr = self.read_ipv4_address()?; let mask = self.read_u8()?; - match Ipv4Net::new(addr, mask) { - Ok(n) => Ok(n), - Err(_) => Err(io::Error::new(io::ErrorKind::Other, "Invalid prefix mask").into()), - } + Ipv4Net::new(addr, mask).map_err(ParserError::from) } fn read_ipv6_prefix(&mut self) -> Result { + self.require_n_remaining(17, "IPv6 Prefix")?; let addr = self.read_ipv6_address()?; let mask = self.read_u8()?; - match Ipv6Net::new(addr, mask) { - Ok(n) => Ok(n), - Err(_) => Err(io::Error::new(io::ErrorKind::Other, "Invalid prefix mask").into()), - } + Ipv6Net::new(addr, mask).map_err(ParserError::from) } #[inline] @@ -115,35 +176,66 @@ pub trait ReadUtils: Buf { } } - fn read_asns(&mut self, as_length: &AsnLength, count: usize) -> Result, ParserError> { - let mut path = Vec::with_capacity(count); - - match as_length { - AsnLength::Bits16 => { - self.has_n_remaining(count * 2)?; // 2 bytes for 16-bit ASN - for _ in 0..count { - path.push(Asn::new_16bit(self.read_u16()?)); - } - } - AsnLength::Bits32 => { - self.has_n_remaining(count * 4)?; // 4 bytes for 32-bit ASN - for _ in 0..count { - path.push(Asn::new_32bit(self.read_u32()?)); - } - } - } - - Ok(path) - } - + #[inline(always)] fn read_afi(&mut self) -> Result { Afi::try_from(self.read_u16()?).map_err(ParserError::from) } + #[inline(always)] fn read_safi(&mut self) -> Result { Safi::try_from(self.read_u8()?).map_err(ParserError::from) } + /// An alternative to [ReadUtils::read_nlri_prefix] which is easier for the compiler to + /// optimize. Calling `x.read_v4_nlri_prefix()` is functionally equivalent to + /// `x.read_nlri_prefix(&Afi::Ipv4, false)`. + #[inline(always)] + fn read_v4_nlri_prefix(&mut self) -> Result { + // Length in bits and bytes + let bit_len = self.read_u8()?; + + if bit_len > 32 { + return Err(ParserError::InvalidPrefixLength(PrefixLenError)); + } + + let byte_len: usize = (bit_len as usize + 7) / 8; + + let mut buff = [0; 4]; + self.read_exact(&mut buff[..byte_len])?; + + let prefix = match Ipv4Net::new(Ipv4Addr::from(buff), bit_len) { + Ok(v) => IpNet::V4(v), + Err(_) => unreachable!("Bit length has already been checked"), + }; + + Ok(NetworkPrefix { prefix, path_id: 0 }) + } + + /// An alternative to [ReadUtils::read_nlri_prefix] which is easier for the compiler to + /// optimize. Calling `x.read_v6_nlri_prefix()` is functionally equivalent to + /// `x.read_nlri_prefix(&Afi::Ipv6, false)`. + #[inline(always)] + fn read_v6_nlri_prefix(&mut self) -> Result { + // Length in bits and bytes + let bit_len = self.read_u8()?; + + // 16 bytes + if bit_len > 128 { + return Err(ParserError::InvalidPrefixLength(PrefixLenError)); + } + let byte_len: usize = (bit_len as usize + 7) / 8; + + let mut buff = [0; 16]; + self.read_exact(&mut buff[..byte_len])?; + + let prefix = match Ipv6Net::new(Ipv6Addr::from(buff), bit_len) { + Ok(v) => IpNet::V6(v), + Err(_) => unreachable!("Bit length has already been checked"), + }; + + Ok(NetworkPrefix { prefix, path_id: 0 }) + } + /// Read announced/withdrawn prefix. /// /// The length in bits is 1 byte, and then based on the IP version it reads different number of bytes. @@ -156,50 +248,38 @@ pub trait ReadUtils: Buf { ) -> Result { let path_id = if add_path { self.read_u32()? } else { 0 }; - // Length in bits + // Length in bits and bytes let bit_len = self.read_u8()?; - - // Convert to bytes let byte_len: usize = (bit_len as usize + 7) / 8; - let addr: IpAddr = match afi { + + let prefix = match afi { Afi::Ipv4 => { - // 4 bytes -- u32 - if byte_len > 4 { - return Err(ParserError::ParseError(format!( - "Invalid byte length for IPv4 prefix. byte_len: {}, bit_len: {}", - byte_len, bit_len - ))); + // 4 bytes + if bit_len > 32 { + return Err(ParserError::InvalidPrefixLength(PrefixLenError)); } + let mut buff = [0; 4]; - self.has_n_remaining(byte_len)?; - for i in 0..byte_len { - buff[i] = self.get_u8(); + self.read_exact(&mut buff[..byte_len])?; + + match Ipv4Net::new(Ipv4Addr::from(buff), bit_len) { + Ok(v) => IpNet::V4(v), + Err(_) => unreachable!("Bit length has already been checked"), } - IpAddr::V4(Ipv4Addr::from(buff)) } Afi::Ipv6 => { // 16 bytes - if byte_len > 16 { - return Err(ParserError::ParseError(format!( - "Invalid byte length for IPv6 prefix. byte_len: {}, bit_len: {}", - byte_len, bit_len - ))); + if bit_len > 128 { + return Err(ParserError::InvalidPrefixLength(PrefixLenError)); } - self.has_n_remaining(byte_len)?; + let mut buff = [0; 16]; - for i in 0..byte_len { - buff[i] = self.get_u8(); + self.read_exact(&mut buff[..byte_len])?; + + match Ipv6Net::new(Ipv6Addr::from(buff), bit_len) { + Ok(v) => IpNet::V6(v), + Err(_) => unreachable!("Bit length has already been checked"), } - IpAddr::V6(Ipv6Addr::from(buff)) - } - }; - let prefix = match IpNet::new(addr, bit_len) { - Ok(p) => p, - Err(_) => { - return Err(ParserError::ParseError(format!( - "Invalid network prefix length: {}", - bit_len - ))) } }; @@ -207,8 +287,10 @@ pub trait ReadUtils: Buf { } fn read_n_bytes(&mut self, n_bytes: usize) -> Result, ParserError> { - self.has_n_remaining(n_bytes)?; - Ok(self.copy_to_bytes(n_bytes).into()) + self.require_n_remaining(n_bytes, "raw bytes")?; + let mut buffer = vec![0; n_bytes]; + self.read_exact(&mut buffer)?; + Ok(buffer) } fn read_n_bytes_to_string(&mut self, n_bytes: usize) -> Result { @@ -220,82 +302,64 @@ pub trait ReadUtils: Buf { } } -pub fn parse_nlri_list( - mut input: Bytes, +#[cold] +#[inline(never)] +fn parse_nlri_list_fallback( + mut input: &[u8], + afi: Afi, add_path: bool, - afi: &Afi, -) -> Result, ParserError> { - let mut is_add_path = add_path; - let mut prefixes = vec![]; - - let mut retry = false; - let mut guessed = false; - - let mut input_copy = None; - - while input.remaining() > 0 { - if !is_add_path && input[0] == 0 { - // it's likely that this is a add-path wrongfully wrapped in non-add-path msg - debug!("not add-path but with NLRI size to be 0, likely add-path msg in wrong msg type, treat as add-path now"); - // cloning the data bytes - is_add_path = true; - guessed = true; - input_copy = Some(input.clone()); - } - let prefix = match input.read_nlri_prefix(afi, is_add_path) { - Ok(p) => p, - Err(e) => { - if guessed { - retry = true; - break; - } else { - return Err(e); - } - } - }; - prefixes.push(prefix); +) -> Result { + let mut prefixes = PrefixList::with_capacity(input.len() / 4); + while !input.is_empty() { + prefixes.push((&mut input).read_nlri_prefix(&afi, add_path)?); } - if retry { - prefixes.clear(); - // try again without attempt to guess add-path - // if we reach here (retry==true), input_copy must be Some - let mut input_2 = input_copy.unwrap(); - while input_2.remaining() > 0 { - let prefix = input_2.read_nlri_prefix(afi, add_path)?; - prefixes.push(prefix); + Ok(prefixes) +} + +fn parse_nlri_list_v4(mut input: &[u8]) -> Result { + let retry_input = input; + let mut prefixes = PrefixList::with_capacity(input.len() / 3); + + while !input.is_empty() { + if input[0] == 0 { + return match parse_nlri_list_fallback(retry_input, Afi::Ipv4, true) { + Ok(v) => Ok(v), + Err(_) => parse_nlri_list_fallback(retry_input, Afi::Ipv4, false), + }; } + + prefixes.push((&mut input).read_v4_nlri_prefix()?); } Ok(prefixes) } -/// A CRC32 implementation that converts a string to a hex string. -/// -/// CRC32 is a checksum algorithm that is used to verify the integrity of data. It is short in -/// length and sufficient for generating unique file names based on remote URLs. -pub fn crc32(input: &str) -> String { - let input_bytes = input.as_bytes(); - let mut table = [0u32; 256]; - let polynomial = 0xedb88320u32; - - for i in 0..256 { - let mut crc = i as u32; - for _ in 0..8 { - if crc & 1 == 1 { - crc = (crc >> 1) ^ polynomial; - } else { - crc >>= 1; - } +fn parse_nlri_list_v6(mut input: &[u8]) -> Result { + let retry_input = input; + let mut prefixes = PrefixList::with_capacity(input.len() / 5); + + while !input.is_empty() { + if input[0] == 0 { + return match parse_nlri_list_fallback(retry_input, Afi::Ipv6, true) { + Ok(v) => Ok(v), + Err(_) => parse_nlri_list_fallback(retry_input, Afi::Ipv6, false), + }; } - table[i as usize] = crc; + + prefixes.push((&mut input).read_v6_nlri_prefix()?); } - let mut crc = !0u32; - for byte in input_bytes.iter() { - let index = ((crc ^ (*byte as u32)) & 0xff) as usize; - crc = (crc >> 8) ^ table[index]; + Ok(prefixes) +} + +pub fn parse_nlri_list(input: &[u8], add_path: bool, afi: Afi) -> Result { + if add_path { + return parse_nlri_list_fallback(input, afi, true); } - format!("{:08x}", !crc) + match afi { + Afi::Ipv4 => parse_nlri_list_v4(input), + Afi::Ipv6 => parse_nlri_list_v6(input), + } } diff --git a/tests/record_parse.rs b/tests/record_parse.rs new file mode 100644 index 0000000..a2de199 --- /dev/null +++ b/tests/record_parse.rs @@ -0,0 +1,21 @@ +//! This integration test simply checks that no errors are encountered when attempting to parse a +//! update dump and a RIB dump. +use bgpkit_parser::BgpkitParser; + +#[test] +fn parse_updates() { + check_file("https://spaces.bgpkit.org/parser/update-example.gz"); +} + +#[test] +fn parse_rib_dump() { + check_file("https://spaces.bgpkit.org/parser/rib-example-small.bz2"); +} + +fn check_file(url: &str) { + let parser = BgpkitParser::new(url).unwrap(); + + for record in parser.into_record_iter() { + assert!(record.is_ok(), "{}", record.unwrap_err()); + } +}