diff --git a/tools/recipes/trace-tools.recipe b/tools/recipes/trace-tools.recipe new file mode 100644 index 0000000000..e339ab78ed --- /dev/null +++ b/tools/recipes/trace-tools.recipe @@ -0,0 +1,35 @@ +#! /bin/bash + +ALPINE_VERSION=v3.18 +ALPINE_BUILD_DEPENDENCIES=(bash git curl musl-dev gcc) +# We technically could cross compile, but would require to auto detect a cross linker, by setting e.g.: +# export CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_LINKER=aarch64-linux-gnu-ld +CROSS_COMPILATION_BROKEN=1 + +run_rustup() { + export CARGO_HOME="$(readlink -f .)/.cargo" + export RUSTUP_HOME="$(readlink -f .)/.rustup" + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain none -y --no-modify-path + source $CARGO_HOME/env + rustup toolchain install stable --allow-downgrade --profile minimal +} + +build_tracedump() { + cd trace-tools && + time RUSTFLAGS='-C target-feature=+crt-static' cargo build --release --target=x86_64-unknown-linux-musl +} + +download() { + cp -a "$LISA_HOME/tools/trace-tools" . + cp "$LISA_HOME/LICENSE.txt" trace-tools/ +} + +build() { + run_rustup && (build_tracedump) +} + +install() { + source "$LISA_HOME/tools/recipes/utils.sh" + cp -v trace-tools/trace-dump/target/release/trace-dump "$LISA_ARCH_ASSETS/" + install_readme trace-dump trace-tools/ LICENSE.txt +} diff --git a/tools/trace-parser/rustfmt.toml b/tools/trace-parser/rustfmt.toml new file mode 100644 index 0000000000..7adb29aff6 --- /dev/null +++ b/tools/trace-parser/rustfmt.toml @@ -0,0 +1,3 @@ +imports_granularity="Crate" +group_imports="StdExternalCrate" +skip_macro_invocations=["make_closure_coerce", "make_closure_coerce_type", "closure"] diff --git a/tools/trace-parser/trace-tools/.gitignore b/tools/trace-parser/trace-tools/.gitignore new file mode 100644 index 0000000000..3765cea38e --- /dev/null +++ b/tools/trace-parser/trace-tools/.gitignore @@ -0,0 +1,4 @@ +debug/ +target/ +Cargo.lock +**/*.rs.bk diff --git a/tools/trace-parser/trace-tools/Cargo.toml b/tools/trace-parser/trace-tools/Cargo.toml new file mode 100644 index 0000000000..cd81f0dd55 --- /dev/null +++ b/tools/trace-parser/trace-tools/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "trace-tools" +version = "0.1.0" +edition = "2021" + +[lib] +name = "lib" +path = "src/lib/lib.rs" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +traceevent = { path = "../traceevent" } + +thiserror = "1.0" +smartstring = {version = "1.0", features = ["serde"]} +arrow2 = { version = "0.18.0", features = ["io_parquet", "io_parquet_compression"] } +crossbeam = "0.8" +serde_json = "1.0" + +nom = "7.1" +bytemuck = "1.13" +clap = { version = "4.4", features = ["derive"] } + +[target.'cfg(target_arch = "x86_64")'.dependencies] +mimalloc = {version = "0.1", default-features = false } + +[profile.release] +debug = true + +# Static build: +# rustup target add x86_64-unknown-linux-musl +# RUSTFLAGS='-C target-feature=+crt-static' cargo build --release --target x86_64-unknown-linux-musl diff --git a/tools/trace-parser/trace-tools/fuzz/.gitignore b/tools/trace-parser/trace-tools/fuzz/.gitignore new file mode 100644 index 0000000000..1a45eee776 --- /dev/null +++ b/tools/trace-parser/trace-tools/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/tools/trace-parser/trace-tools/fuzz/Cargo.toml b/tools/trace-parser/trace-tools/fuzz/Cargo.toml new file mode 100644 index 0000000000..7cd0b5b0f4 --- /dev/null +++ b/tools/trace-parser/trace-tools/fuzz/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "trace-tools-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[target.'cfg(target_arch = "x86_64")'.dependencies] +mimalloc = {version = "0.1", default-features = false } + +[dependencies.trace-tools] +path = ".." + +[dependencies.traceevent] +path = "../../traceevent/" + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[profile.release] +debug = 1 + +[[bin]] +name = "print" +path = "fuzz_targets/print.rs" +test = false +doc = false diff --git a/tools/trace-parser/trace-tools/fuzz/fuzz_targets/print.rs b/tools/trace-parser/trace-tools/fuzz/fuzz_targets/print.rs new file mode 100644 index 0000000000..f54bd89e94 --- /dev/null +++ b/tools/trace-parser/trace-tools/fuzz/fuzz_targets/print.rs @@ -0,0 +1,27 @@ +#![no_main] +#[cfg(target_arch = "x86_64")] +use mimalloc::MiMalloc; +use traceevent::header; +#[global_allocator] +#[cfg(target_arch = "x86_64")] +static GLOBAL: MiMalloc = MiMalloc; + +use std::io::Write; + +use lib::{check_header, parquet::dump_events, print::print_events}; +use libfuzzer_sys::fuzz_target; +use traceevent; + +fuzz_target!(|data: &[u8]| { + // Speedup the test by not writing anything anywhere + let mut out = std::io::sink(); + + let mut run = move || { + let mut reader: traceevent::io::BorrowingCursor<_> = data.into(); + let header = header::header(&mut reader)?; + let res = print_events(&header, reader, &mut out); + res + }; + + let _ = run(); +}); diff --git a/tools/trace-parser/trace-tools/src/bin/trace-dump.rs b/tools/trace-parser/trace-tools/src/bin/trace-dump.rs new file mode 100644 index 0000000000..a1cea02645 --- /dev/null +++ b/tools/trace-parser/trace-tools/src/bin/trace-dump.rs @@ -0,0 +1,120 @@ +use std::{error::Error, fs::File, io::Write, path::PathBuf, process::ExitCode}; + +#[cfg(target_arch = "x86_64")] +use mimalloc::MiMalloc; +use traceevent::{header, header::Timestamp}; +#[global_allocator] +#[cfg(target_arch = "x86_64")] +static GLOBAL: MiMalloc = MiMalloc; + +use clap::{Parser, Subcommand}; +use lib::{ + check_header, + parquet::{dump_events, dump_header_metadata}, + print::print_events, +}; + +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +struct Cli { + #[arg(long, value_name = "TRACE")] + trace: PathBuf, + + #[arg(long, value_name = "ERRORS_JSON")] + errors_json: Option, + + #[command(subcommand)] + command: Command, +} + +#[derive(Subcommand)] +enum Command { + HumanReadable, + Parquet { + #[arg(long, value_name = "EVENTS")] + events: Option>, + #[arg(long)] + unique_timestamps: bool, + }, + CheckHeader, + Metadata, +} + +fn _main() -> Result<(), Box> { + let cli = Cli::parse(); + + let path = cli.trace; + let file = std::fs::File::open(path)?; + let mut reader = unsafe { traceevent::io::MmapFile::new(file) }?; + let header = header::header(&mut reader)?; + + // We make the timestamp unique assuming it will be manipulated as an f64 number of seconds by + // consumers + let conv_ts = |ts| (ts as f64) / 1e9; + let make_unique_timestamps = { + let mut prev = (0, conv_ts(0)); + move |mut ts: Timestamp| { + ts = std::cmp::max(prev.0, ts); + let mut _ts = conv_ts(ts); + while prev.1 == _ts { + ts += 1; + _ts = conv_ts(ts); + } + prev = (ts, _ts); + ts + } + }; + + let stdout = std::io::stdout().lock(); + let mut out = std::io::BufWriter::with_capacity(1024 * 1024, stdout); + + let res = match cli.command { + Command::HumanReadable => print_events(&header, reader, &mut out), + Command::Parquet { + events, + unique_timestamps, + } => { + let make_ts: Box _> = if unique_timestamps { + Box::new(make_unique_timestamps) + } else { + Box::new(|ts| ts) + }; + + dump_events(&header, reader, make_ts, events) + } + Command::CheckHeader => check_header(&header, &mut out), + Command::Metadata => dump_header_metadata(&header, &mut out), + }; + out.flush()?; + + if let Err(err) = &res { + eprintln!("Errors happened while processing the trace:{err}"); + } + + if let Some(path) = &cli.errors_json { + let errors = match &res { + Err(err) => err + .errors() + .into_iter() + .map(|err| err.to_string()) + .collect(), + Ok(_) => Vec::new(), + }; + let mut file = File::create(&path)?; + let json_value = serde_json::json!({ + "errors": errors, + }); + file.write_all(json_value.to_string().as_bytes())?; + } + match res { + Ok(_) => Ok(()), + Err(_) => Err("Errors happened".into()), + } +} + +fn main() -> ExitCode { + match _main() { + Err(_) => ExitCode::from(1), + Ok(_) => ExitCode::from(0), + } +} diff --git a/tools/trace-parser/trace-tools/src/lib/error.rs b/tools/trace-parser/trace-tools/src/lib/error.rs new file mode 100644 index 0000000000..6f7258b2fd --- /dev/null +++ b/tools/trace-parser/trace-tools/src/lib/error.rs @@ -0,0 +1,57 @@ +use std::{error::Error, fmt}; + +#[derive(Debug)] +pub struct MultiError { + errors: Vec, +} + +impl MultiError { + fn new>(errors: I) -> Self { + MultiError { + errors: errors.into_iter().collect(), + } + } + + pub fn errors(&self) -> impl IntoIterator { + &self.errors + } +} + +impl fmt::Display for MultiError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + for err in &self.errors { + err.fmt(f)?; + write!(f, "\n")?; + } + Ok(()) + } +} + +#[derive(Debug)] +pub struct DynMultiError(MultiError>); + +impl DynMultiError { + pub fn new>(errors: I) -> Self { + DynMultiError(MultiError::new( + errors + .into_iter() + .map(|err| Box::new(err) as Box), + )) + } + pub fn errors(&self) -> impl IntoIterator { + self.0.errors().into_iter().map(AsRef::as_ref) + } +} + +impl fmt::Display for DynMultiError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + self.0.fmt(f) + } +} + +impl From for DynMultiError { + #[inline] + fn from(error: E) -> Self { + error.into() + } +} diff --git a/tools/trace-parser/trace-tools/src/lib/lib.rs b/tools/trace-parser/trace-tools/src/lib/lib.rs new file mode 100644 index 0000000000..bc20caf283 --- /dev/null +++ b/tools/trace-parser/trace-tools/src/lib/lib.rs @@ -0,0 +1,42 @@ +pub mod error; +pub mod parquet; +pub mod print; + +use std::io::Write; + +use traceevent::header::Header; + +use crate::error::DynMultiError; + +pub fn check_header(header: &Header, mut out: W) -> Result<(), DynMultiError> { + for desc in header.event_descs() { + writeln!(&mut out, "Checking event \"{}\" format", desc.name)?; + + let raw_fmt = std::str::from_utf8(desc.raw_fmt()?)?; + match desc.event_fmt() { + Err(err) => { + writeln!( + &mut out, + " Error while parsing event format: {err}:\n{raw_fmt}" + ) + } + Ok(fmt) => { + match fmt.print_args() { + Ok(print_args) => { + print_args.iter().enumerate().try_for_each(|(i, res)| match res { + Err(err) => { + writeln!(&mut out, " Error while compiling printk argument #{i}: {err}:\n{raw_fmt}") + } + Ok(_) => Ok(()), + })?; + Ok(()) + } + Err(err) => { + writeln!(&mut out, " Error while parsing event print format arguments: {err}:\n{raw_fmt}") + } + } + } + }?; + } + Ok(()) +} diff --git a/tools/trace-parser/trace-tools/src/lib/parquet.rs b/tools/trace-parser/trace-tools/src/lib/parquet.rs new file mode 100644 index 0000000000..58b36aca69 --- /dev/null +++ b/tools/trace-parser/trace-tools/src/lib/parquet.rs @@ -0,0 +1,1578 @@ +use core::{ + cell::{Cell, RefCell, RefMut}, + convert::identity, + ops::{Deref, DerefMut}, +}; +use std::{ + collections::{btree_map::Entry, BTreeMap}, + fs::File, + io::Write, + path::PathBuf, + rc::Rc, + sync::Arc, +}; + +use arrow2::{ + array::{ + Array, MutableArray, MutableBinaryArray, MutableBooleanArray, MutableListArray, + MutablePrimitiveArray, MutableUtf8Array, TryPush as _, + }, + chunk::Chunk, + datatypes::{DataType, Field, Schema}, + error::Error as ArrowError, + io::parquet::write::{ + CompressionOptions, Encoding, FileWriter, RowGroupIterator, Version, WriteOptions, + }, +}; +use crossbeam::{ + channel::{bounded, Sender}, + thread::{scope, Scope, ScopedJoinHandle}, +}; +use nom::{Finish as _, Parser as _}; +use traceevent::{ + self, + buffer::{BufferError, EventVisitor}, + cinterp::{EvalEnv, EvalError, Value}, + cparser::{identifier, ArrayKind, Type}, + header::{ + Address, EventDesc, EventId, FieldFmt, Header, HeaderError, LongSize, Options, Timestamp, + CPU, + }, + io::BorrowingRead, + print::{PrintArg, PrintAtom, PrintFmtError, PrintFmtStr, VBinSpecifier}, +}; + +use crate::error::DynMultiError; + +// This size is a sweet spot. If in doubt, it's best to have chunks that are too big than too +// small, as smaller chunks can wreak performances and might also mean more work when consuming the +// file. In my experiments, 16 * 1024 was a transition point between good and horrible performance. +// Note that this chunk size is expressed in terms of number of rows, independently from the size +// of the rows themselves. +const CHUNK_SIZE: usize = 64 * 1024; + +type ArrayChunk = Chunk>; + +#[allow(clippy::enum_variant_names)] +#[derive(thiserror::Error, Debug)] +#[non_exhaustive] +enum MainError { + #[error("Error while loading data: {0}")] + IoError(#[from] std::io::Error), + + #[error("Error while parsing header: {0}")] + HeaderError(#[from] HeaderError), + + #[error("Error while parsing buffer: {0}")] + BufferError(#[from] BufferError), + + #[error("Error while interpreting event data: {0}")] + EvalError(#[from] EvalError), + + #[error("Error while parsing print format string: {0}")] + PrintFmtError(#[from] PrintFmtError), + + #[error("Arrow error: {0}")] + ArrowError(#[from] ArrowError), + + #[error("Type not handled: {0:?}")] + TypeNotHandled(Box), + + #[error("Arrow data type not handled: {0:?}")] + ArrowDataTypeNotHandled(Box), + + #[error( + "Runtime data cannot be used according to the column storage schema: {}", + match .0 { + Some(x) => x.to_string(), + None => "".to_string(), + } + )] + DataMismatchingSchema(Option>>), + + #[error("Missing field")] + MissingField, + + #[error("This print format string does not describe a meta event")] + NotAMetaEvent, + + #[error("Error while processing {} field {}: {}", .0.field_name, .0.event_name.as_deref().unwrap_or(""), .0.error)] + FieldError(Box) +} + +#[derive(Debug)] +struct FieldError { + event_name: Option, + field_name: String, + error: MainError, +} + +impl MainError { + fn with_field(self, event_name: Option<&str>, field_name: &str) -> Self { + MainError::FieldError(Box::new(FieldError { + event_name: event_name.map(Into::into), + field_name: field_name.into(), + error: self, + })) + } +} + +#[derive(Clone, Debug)] +enum EventCtx { + Selected(T), + NotSelected, +} + +#[derive(Clone, Debug)] +struct SharedState<'scope, 'scopeenv>(Rc, MainError>>>); + +impl<'scope, 'scopeenv> SharedState<'scope, 'scopeenv> { + fn new(x: Result, MainError>) -> Self { + SharedState(Rc::new(RefCell::new(x))) + } + + #[inline] + fn borrow_mut<'a>( + &'a self, + ) -> impl DerefMut, MainError>> + 'a { + RefCell::borrow_mut(&self.0) + } + + #[inline] + fn into_inner(self) -> Result, MainError>, Self> { + match Rc::try_unwrap(self.0) { + Ok(refcell) => Ok(refcell.into_inner()), + Err(inner) => Err(SharedState(inner)), + } + } +} + +pub fn dump_events( + header: &Header, + reader: R, + mut modify_timestamps: FTimestamp, + only_events: Option>, +) -> Result<(), DynMultiError> +where + FTimestamp: FnMut(Timestamp) -> Timestamp, + R: BorrowingRead + Send, +{ + let only_events = &only_events; + let options = WriteOptions { + write_statistics: true, + compression: CompressionOptions::Zstd(None), + version: Version::V2, + data_pagesize_limit: None, + }; + + // TODO: EventId might not be enough if we extend the API to deal with buffers from multiple + // traces + // + // Keep the per-event state in a map that is shared between all buffers. Otherwise, we would + // end up with a state per-event and per-buffer, which is not what we want. + type StateMap<'scope, 'scopeenv> = BTreeMap>>; + + macro_rules! chunk_append { + ($scrutinee:expr, $($arms:expr),*) => { + loop { + $( + $arms; + )* + + default_error!() + } + } + } + + macro_rules! make_macros { + ($scrutinee:expr) => { + let scrutinee = $scrutinee; + + macro_rules! default_error { + () => { + break Err(MainError::DataMismatchingSchema( + scrutinee.1.into_static().map(Box::new).ok(), + )) + }; + } + + macro_rules! basic { + ($pat:pat => $expr:expr) => { + if let $pat = scrutinee { + let xs = $expr; + break Ok(xs.len()); + } + }; + } + + macro_rules! integer { + ($arr_ctor:path, $val_ctor:path, $f:expr) => { + if let ($arr_ctor(xs), x) = scrutinee { + match x { + $val_ctor(x) => { + #[allow(clippy::redundant_closure_call)] + xs.push(Some($f(x))); + break Ok(xs.len()); + } + x => { + break Err(MainError::DataMismatchingSchema( + x.into_static().map(Box::new).ok(), + )) + } + } + } + }; + } + }; + } + + scope(move |scope| -> Result<_, _> { + let mut count: u64 = 0; + let buffers = header.buffers(reader)?; + let mut state_map = StateMap::new(); + let mut time_range = (None, None); + + let mut make_ctx = |header: &_, event_desc: &EventDesc| { + let id = event_desc.id; + match state_map.entry(id) { + Entry::Vacant(entry) => { + let select = match only_events { + None => true, + Some(only_events) => only_events.iter().any(|selected| { + event_desc.name.deref() == selected.deref() + }) + }; + if select { + let state = { + ReadState::new(header, event_desc, options, &event_desc.name, scope) + }; + + let state = EventCtx::Selected(SharedState::new(state)); + entry.insert(state.clone()); + state + } else { + EventCtx::NotSelected + } + } + Entry::Occupied(entry) => entry.get().clone(), + } + }; + + let events = traceevent::buffer::flyrecord( + buffers, + { + let time_range = &mut time_range; + let count = &mut count; + let mut slice_scratch = Vec::new(); + move |res: Result>, _>| -> Result<(), MainError> { + match res { + Ok(visitor) => { + *count += 1; + // This needs to happen regardless of whether the event is selected, + // otherwise the resulting timestamps would vary based on the set of + // events selected, making caching of the parquet files problematic. + let ts = modify_timestamps(visitor.timestamp); + *time_range = match time_range { + (None, _) => (Some(ts), None), + (first@Some(_), _) => (*first, Some(ts)), + }; + + match visitor.event_ctx()? { + EventCtx::Selected(state) => { + let mut state = state.borrow_mut(); + let state = state.deref_mut(); + match state { + // We ignore the error here as it will be reported at the end when finalizing + // the files. + Err(_) => Ok(()), + Ok(state) => { + let buf_id = visitor.buffer_id; + let event_desc = visitor.event_desc()?; + + let cpu = buf_id.cpu; + let len: Cell = Cell::new(0); + + let mut table_state = state.process_fields( + &visitor, + |field_name, col, val| -> Result<(), MainError> { + let val = val?; + + macro_rules! cast { + ($from:ty, $to:ty) => { + |x: $from| -> $to { + let x: $to = match x.try_into() { + Ok(x) => x, + Err(_) => { + let conv = + concat!("from ", stringify!($from), " to ", stringify!($to)); + panic!( + "Cannot convert {}.{}={x} at t={ts} on CPU {cpu} {conv}", + event_desc.name, field_name, + ) + } + }; + x + } + }; + } + + make_macros!((col, val)); + len.replace(chunk_append! { + // Integers + integer!(FieldArray::I8, Value::I64Scalar, cast!(i64, i8)), + integer!(FieldArray::I16, Value::I64Scalar, cast!(i64, i16)), + integer!(FieldArray::I32, Value::I64Scalar, cast!(i64, i32)), + integer!(FieldArray::I64, Value::I64Scalar, identity), + + integer!(FieldArray::U8, Value::U64Scalar, cast!(u64, u8)), + integer!(FieldArray::U16, Value::U64Scalar, cast!(u64, u16)), + integer!(FieldArray::U32, Value::U64Scalar, cast!(u64, u32)), + integer!(FieldArray::U64, Value::U64Scalar, identity), + + basic!((FieldArray::Str(xs), x) => { + xs.push(x.deref_ptr(&visitor.buffer_env())?.to_str()); + xs + }), + + basic!((FieldArray::Bool(xs), Value::I64Scalar(x)) => { + xs.push(Some(x != 0)); + xs + }), + basic!((FieldArray::Bool(xs), Value::U64Scalar(x)) => { + xs.push(Some(x != 0)); + xs + }), + + // Binary + basic!((FieldArray::Binary(xs), Value::U8Array(x)) => { + xs.push(Some(x)); + xs + }), + basic!((FieldArray::Binary(xs), Value::I8Array(x)) => { + xs.push(Some(bytemuck::cast_slice(&x))); + xs + }), + basic!((FieldArray::Binary(xs), Value::U16Array(x)) => { + xs.push(Some(bytemuck::cast_slice(&x))); + xs + }), + basic!((FieldArray::Binary(xs), Value::I16Array(x)) => { + xs.push(Some(bytemuck::cast_slice(&x))); + xs + }), + basic!((FieldArray::Binary(xs), Value::U32Array(x)) => { + xs.push(Some(bytemuck::cast_slice(&x))); + xs + }), + basic!((FieldArray::Binary(xs), Value::I32Array(x)) => { + xs.push(Some(bytemuck::cast_slice(&x))); + xs + }), + basic!((FieldArray::Binary(xs), Value::U64Array(x)) => { + xs.push(Some(bytemuck::cast_slice(&x))); + xs + }), + basic!((FieldArray::Binary(xs), Value::I64Array(x)) => { + xs.push(Some(bytemuck::cast_slice(&x))); + xs + }), + + + // Lists + basic!((FieldArray::ListBool(xs), Value::U8Array(x)) => { + xs.try_push(Some(x.iter().map(|x| Some(*x != 0))))?; + xs + }), + basic!((FieldArray::ListBool(xs), Value::I8Array(x)) => { + xs.try_push(Some(x.iter().map(|x| Some(*x != 0))))?; + xs + }), + basic!((FieldArray::ListU8(xs), Value::U8Array(x)) => { + xs.try_push(Some(x.iter().copied().map(Some)))?; + xs + }), + basic!((FieldArray::ListI8(xs), Value::I8Array(x)) => { + xs.try_push(Some(x.iter().copied().map(Some)))?; + xs + }), + + basic!((FieldArray::ListU16(xs), Value::U16Array(x)) => { + xs.try_push(Some(x.iter().copied().map(Some)))?; + xs + }), + basic!((FieldArray::ListI16(xs), Value::I16Array(x)) => { + xs.try_push(Some(x.iter().copied().map(Some)))?; + xs + }), + + basic!((FieldArray::ListU32(xs), Value::U32Array(x)) => { + xs.try_push(Some(x.iter().copied().map(Some)))?; + xs + }), + basic!((FieldArray::ListI32(xs), Value::I32Array(x)) => { + xs.try_push(Some(x.iter().copied().map(Some)))?; + xs + }), + + basic!((FieldArray::ListU64(xs), Value::U64Array(x)) => { + xs.try_push(Some(x.iter().copied().map(Some)))?; + xs + }), + basic!((FieldArray::ListI64(xs), Value::I64Array(x)) => { + xs.try_push(Some(x.iter().copied().map(Some)))?; + xs + }), + + // Bitmap + basic!((FieldArray::ListU8(xs), Value::Bitmap(x)) => { + xs.try_push(Some(x.into_iter().as_bytes().map(Some)))?; + xs + }), + basic!((FieldArray::Binary(xs), Value::Bitmap(x)) => { + slice_scratch.clear(); + slice_scratch.extend(x.into_iter().as_bytes()); + xs.try_push(Some(&slice_scratch))?; + xs + }), + basic!((FieldArray::ListBool(xs), Value::Bitmap(x)) => { + xs.try_push(Some(x.into_iter().as_bits().map(Some)))?; + xs + }) + + }?); + Ok(()) + }, + only_events, + )?; + + table_state.fixed_cols.time.push(Some(ts)); + table_state.fixed_cols.cpu.push(Some(cpu)); + + if len.get() >= CHUNK_SIZE { + let chunk = table_state.extract_chunk()?; + table_state.sender.send(chunk).unwrap(); + } + Ok(()) + } + } + } + _ => Ok(()) + } + } + Err(err) => Err(err.into()), + } + } + }, + &mut make_ctx, + )?; + + let mut errors = Vec::new(); + let mut push_global_err = |err: Result<(), MainError>| match err { + Err(err) => { + errors.push(err); + } + _ => {} + }; + + // Even if there were some errors, we should have pushed some None values so that all + // columns are of the same length, so the file can be finalized. + events.into_iter().map(&mut push_global_err).for_each(drop); + eprintln!("Found {count} event records in this trace"); + + // Ensure we have a file for each event that was asked for as long as that event is + // actually available in the trace header. + if let Some(only_events) = only_events { + for event in only_events { + header.event_desc_by_name(event).map(|event_desc| make_ctx(header, event_desc)); + } + } + + let mut handles = Vec::new(); + let mut events_info = Vec::new(); + + while let Some((id, ctx)) = state_map.pop_first() { + match ctx { + EventCtx::Selected(read_state) => { + // There shouldn't be any other clone of the Rc<> at this point so we can + // safely unwrap. + push_global_err(match read_state.into_inner().unwrap() { + Ok(read_state) => { + for mut read_state in read_state.drain_states() { + let res = match read_state.extract_chunk() { + Ok(chunk) => { + read_state.sender.send(chunk).unwrap(); + // Drop the sender which will close the channel so that the writer thread will + // know it's time to finish. + drop(read_state.sender); + handles.push(read_state.handle); + eprintln!("File written successfully {}", read_state.name); + Ok(()) + } + Err(err) => Err(err), + }; + let path = match res { + Ok(_) => read_state.path.to_str().expect("Unable to convert PathBuf to String").into(), + Err(_) => serde_json::Value::Null, + }; + read_state.errors.extend_errors([res]); + + let errors = read_state.errors.errors; + if !errors.is_empty() { + eprintln!("Errors encountered while dumping event {}, see meta.json for details", read_state.name); + } + + events_info.push(serde_json::json!({ + "event": read_state.name, + "path": path, + "format": "parquet", + "errors": errors.into_iter().map(|err| err.to_string()).collect::>(), + })); + } + Ok(()) + } + Err(err) => { + match header.event_desc_by_id(id) { + Some(desc) => { + events_info.push(serde_json::json!({ + "event": desc.name, + "path": None::<&str>, + "format": "parquet", + "errors": [err.to_string()], + })); + Ok(()) + }, + // If we cannot get the associated event name, we just turn it into + // a global error. + _ => Err(err) + } + } + }); + } + _ => {}, + } + } + + for handle in handles { + handle.join().expect("Writer thread panicked")?; + } + + let time_range = match time_range { + (Some(start), Some(end)) => (start, end), + (Some(start), None) => (start, start), + (None, None) => (0, 0), + (None, Some(end)) => panic!("Time time_range has an end ({end}) but not a start"), + }; + + push_global_err((|| { + dump_metadata( + File::create("meta.json")?, + header, + Some(events_info), + Some(time_range), + ) + })()); + + if errors.is_empty() { + Ok(()) + } else { + Err(DynMultiError::new(errors)) + } + }).unwrap() +} + +fn dump_metadata( + mut writer: W, + header: &Header, + events_info: Option>, + time_range: Option<(Timestamp, Timestamp)>, +) -> Result<(), MainError> { + let mut json_value = serde_json::json!({ + "pid-comms": header.pid_comms().into_iter().collect::>(), + "cpus-count": header.nr_cpus(), + "symbols-address": header.kallsyms().into_iter().collect::>(), + "available-events": header.event_descs().into_iter().map(|desc| desc.name.deref()).collect::>(), + }); + + if let Some(events_info) = events_info { + json_value["events-info"] = events_info.into(); + } + + if let Some(time_range) = time_range { + json_value["time-range"] = vec![time_range.0, time_range.1].into(); + } + + let trace_id = header.options().into_iter().find_map(|opt| match opt { + Options::TraceId(id) => Some(*id), + _ => None, + }); + if let Some(id) = trace_id { + json_value["trace-id"] = id.into(); + } + + Ok(writer.write_all(json_value.to_string().as_bytes())?) +} + +pub fn dump_header_metadata(header: &Header, writer: W) -> Result<(), DynMultiError> { + Ok(dump_metadata(writer, header, None, None)?) +} + +#[derive(Debug)] +enum FieldArray { + U8(MutablePrimitiveArray), + U16(MutablePrimitiveArray), + U32(MutablePrimitiveArray), + U64(MutablePrimitiveArray), + + I8(MutablePrimitiveArray), + I16(MutablePrimitiveArray), + I32(MutablePrimitiveArray), + I64(MutablePrimitiveArray), + + Bool(MutableBooleanArray), + // Using i32 means strings and binary blobs have to be smaller than 2GB, which should be fine + Binary(MutableBinaryArray), + Str(MutableUtf8Array), + + ListBool(MutableListArray), + + ListU8(MutableListArray>), + ListU16(MutableListArray>), + ListU32(MutableListArray>), + ListU64(MutableListArray>), + + ListI8(MutableListArray>), + ListI16(MutableListArray>), + ListI32(MutableListArray>), + ListI64(MutableListArray>), +} + +impl FieldArray { + fn into_arc(self) -> Arc { + match self { + FieldArray::U8(xs) => xs.into_arc(), + FieldArray::U16(xs) => xs.into_arc(), + FieldArray::U32(xs) => xs.into_arc(), + FieldArray::U64(xs) => xs.into_arc(), + + FieldArray::I8(xs) => xs.into_arc(), + FieldArray::I16(xs) => xs.into_arc(), + FieldArray::I32(xs) => xs.into_arc(), + FieldArray::I64(xs) => xs.into_arc(), + + FieldArray::Bool(xs) => xs.into_arc(), + FieldArray::Str(xs) => xs.into_arc(), + FieldArray::Binary(xs) => xs.into_arc(), + + FieldArray::ListBool(xs) => xs.into_arc(), + + FieldArray::ListU8(xs) => xs.into_arc(), + FieldArray::ListU16(xs) => xs.into_arc(), + FieldArray::ListU32(xs) => xs.into_arc(), + FieldArray::ListU64(xs) => xs.into_arc(), + + FieldArray::ListI8(xs) => xs.into_arc(), + FieldArray::ListI16(xs) => xs.into_arc(), + FieldArray::ListI32(xs) => xs.into_arc(), + FieldArray::ListI64(xs) => xs.into_arc(), + } + } + + fn push_null(&mut self) { + match self { + FieldArray::U8(xs) => xs.push_null(), + FieldArray::U16(xs) => xs.push_null(), + FieldArray::U32(xs) => xs.push_null(), + FieldArray::U64(xs) => xs.push_null(), + + FieldArray::I8(xs) => xs.push_null(), + FieldArray::I16(xs) => xs.push_null(), + FieldArray::I32(xs) => xs.push_null(), + FieldArray::I64(xs) => xs.push_null(), + + FieldArray::Bool(xs) => xs.push_null(), + FieldArray::Str(xs) => xs.push_null(), + FieldArray::Binary(xs) => xs.push_null(), + + FieldArray::ListBool(xs) => xs.push_null(), + + FieldArray::ListU8(xs) => xs.push_null(), + FieldArray::ListU16(xs) => xs.push_null(), + FieldArray::ListU32(xs) => xs.push_null(), + FieldArray::ListU64(xs) => xs.push_null(), + + FieldArray::ListI8(xs) => xs.push_null(), + FieldArray::ListI16(xs) => xs.push_null(), + FieldArray::ListI32(xs) => xs.push_null(), + FieldArray::ListI64(xs) => xs.push_null(), + } + } +} + +#[derive(Debug)] +struct FixedCols { + time: MutablePrimitiveArray, + cpu: MutablePrimitiveArray, +} + +impl FixedCols { + fn new() -> Self { + FixedCols { + time: MutablePrimitiveArray::with_capacity(CHUNK_SIZE), + cpu: MutablePrimitiveArray::with_capacity(CHUNK_SIZE), + } + } + + fn arrow_fields() -> impl Iterator { + [ + Field::new("common_ts", DataType::UInt64, false), + Field::new("common_cpu", DataType::UInt32, false), + ] + .into_iter() + } + + fn into_arcs(self) -> impl Iterator> { + [self.time.into_arc(), self.cpu.into_arc()].into_iter() + } +} + +#[derive(Debug)] +struct ReadState<'scope, 'scopeenv> { + variant: ReadStateVariant<'scope>, + options: WriteOptions, + scope: &'scope Scope<'scopeenv>, +} + +type MetaEventEntry<'scope> = + Rc>, PrintFmtStr), MainError>>>; + +#[derive(Debug)] +enum ReadStateVariant<'scope> { + Generic(TableState<'scope>), + BPrint { + common_pid_fmt: FieldFmt, + fmt_fmt: FieldFmt, + buf_fmt: FieldFmt, + generic: TableState<'scope>, + // We have a table indexed by the address of the format string since that is what we get + // from the bprint event. + meta_events_by_addr: BTreeMap>, + // However, we don't want to accidentally create 2 identical meta events if 2 or more + // independent format strings have the exact same format (e.g. the user copy-pasted some + // calls to trace_printk() in various places). For this purpose, we also maintain a map + // indexed by the format string content, which is used to populate the by-address map. + meta_events_by_fmt: BTreeMap>, + }, +} + +impl<'scope, 'scopeenv> ReadState<'scope, 'scopeenv> +where + 'scopeenv: 'scope, +{ + fn new( + header: &Header, + event_desc: &EventDesc, + options: WriteOptions, + name: &str, + scope: &'scope Scope<'scopeenv>, + ) -> Result { + let (full_schema, fields_schema) = Self::make_event_desc_schemas(header, event_desc)?; + let state = TableState::new(full_schema, fields_schema, options, name, scope)?; + + let variant = match event_desc.name.deref() { + event_name @ "bprint" => { + let struct_fmt = &event_desc.event_fmt()?.struct_fmt()?; + + macro_rules! field_fmt { + ($struct_fmt:expr, $name:expr) => {{ + let field_name = $name; + $struct_fmt.field_by_name(field_name).ok_or_else(|| { + MainError::MissingField.with_field(Some(event_name), field_name) + }) + }}; + } + + let fmt_fmt = field_fmt!(struct_fmt, "fmt")?; + let buf_fmt = field_fmt!(struct_fmt, "buf")?; + let common_pid_fmt = field_fmt!(struct_fmt, "common_pid")?; + + ReadStateVariant::BPrint { + fmt_fmt: fmt_fmt.clone(), + buf_fmt: buf_fmt.clone(), + common_pid_fmt: common_pid_fmt.clone(), + generic: state, + meta_events_by_addr: BTreeMap::new(), + meta_events_by_fmt: BTreeMap::new(), + } + } + _ => ReadStateVariant::Generic(state), + }; + Ok(ReadState { + variant, + options, + scope, + }) + } + + fn process_fields<'ret, 'i, 'h, 'edm, InitDescF, Ctx, F>( + &'ret mut self, + visitor: &'ret EventVisitor<'i, 'h, 'edm, InitDescF, Ctx>, + mut f: F, + only_events: &Option>, + ) -> Result> + 'ret, MainError> + where + 'i: 'ret, + 'h: 'ret, + 'scope: 'ret, + InitDescF: 'h + FnMut(&'h Header, &'h EventDesc) -> Ctx, + F: FnMut(&str, &mut FieldArray, Result, BufferError>) -> Result<(), MainError>, + { + enum DerefMutWrapper<'a, T> { + RefMut(&'a mut T), + RcRefMut(RefMut<'a, T>), + } + + impl<'a, T> Deref for DerefMutWrapper<'a, T> { + type Target = T; + fn deref(&self) -> &T { + match self { + DerefMutWrapper::RefMut(x) => x, + DerefMutWrapper::RcRefMut(x) => x.deref(), + } + } + } + + impl<'a, T> DerefMut for DerefMutWrapper<'a, T> { + fn deref_mut(&mut self) -> &mut T { + match self { + DerefMutWrapper::RefMut(x) => x, + DerefMutWrapper::RcRefMut(x) => x.deref_mut(), + } + } + } + + let mut handle_error = + |visitor: &EventVisitor<'i, 'h, 'edm, InitDescF, Ctx>, name, col: &mut _, val| { + let res = f(name, col, val); + match res { + Err(err) => { + col.push_null(); + Err(err.with_field(visitor.event_name().ok(), name)) + } + _ => Ok(()), + } + }; + + macro_rules! generic_iter { + ($table_state:expr, $visitor:expr) => {{ + let table_state = $table_state; + let visitor = $visitor; + + let field_cols = table_state.field_cols.iter_mut(); + // We want to go through all the columns so that we have a chance to append None + // values in places we had an error, and when we are done we return the last error. + // This way, all columns should have the same length and we will still be able to + // dump to parquet. + + table_state.errors.extend_errors( + visitor + .fields()? + .into_iter() + .zip(field_cols) + .map(|((fmt, val), col)| { + handle_error(visitor, fmt.declaration.identifier.deref(), col, val) + }), + ); + Ok(DerefMutWrapper::RefMut(table_state)) + }}; + } + + macro_rules! bprint_meta_iter { + ($meta_event_entry:expr, $visitor:expr, $buf_fmt:expr, $common_pid_fmt:expr) => {{ + let visitor = $visitor; + let buf_fmt = $buf_fmt; + let common_pid_fmt = $common_pid_fmt; + + let buf = visitor.field_by_fmt(buf_fmt)?; + + match buf { + Value::U32Array(array) => { + let (table_state, print_fmt) = $meta_event_entry; + let mut table_state: RefMut<'ret, _> = RefCell::borrow_mut(table_state); + let mut _table_state = table_state.deref_mut(); + + let pid = visitor.field_by_fmt(common_pid_fmt)?; + + _table_state.errors.extend_errors( + visitor + .vbin_fields(print_fmt, &array) + .into_iter() + .chain([Ok(PrintArg { + value: pid, + width: None, + precision: None, + })]) + .zip(_table_state.field_cols.iter_mut()) + .map(|(res, col)| { + handle_error( + visitor, + &_table_state.name, + col, + res.map(|print_arg| print_arg.value), + ) + }), + ); + + Ok(DerefMutWrapper::RcRefMut(table_state)) + } + val => Err(MainError::EvalError(EvalError::IllegalType(val.into_static().ok()))), + } + }}; + } + + match &mut self.variant { + ReadStateVariant::Generic(state) => generic_iter!(state, visitor), + ReadStateVariant::BPrint { + generic, + fmt_fmt, + buf_fmt, + common_pid_fmt, + meta_events_by_addr, + meta_events_by_fmt, + .. + } => { + let fmt = visitor.field_by_fmt(fmt_fmt)?; + let addr = match fmt { + Value::U64Scalar(addr) => Ok(addr), + Value::I64Scalar(addr) => Ok(addr as u64), + _ => Err(EvalError::CannotDeref(0)), + }?; + + macro_rules! handle { + ($res:expr) => {{ + match Rc::as_ref($res) { + EventCtx::Selected(Ok(entry)) => { + bprint_meta_iter!(entry, visitor, buf_fmt, common_pid_fmt) + } + _ => generic_iter!(generic, visitor), + } + }}; + } + match meta_events_by_addr.entry(addr) { + // We have a recorded attempt to treat it as a meta event that did not succeed, + // so we treat it like a regular bprint text event. + Entry::Occupied(entry) => handle!(entry.into_mut()), + Entry::Vacant(entry) => { + let header = visitor.header; + let env = visitor.buffer_env(); + + let parse_print_fmt = || -> Result { + let print_fmt = env.deref_static(addr)?; + let print_fmt = match print_fmt.to_str() { + Some(s) => Ok(s), + None => Err(EvalError::IllegalType(print_fmt.into_static().ok())), + }?; + Ok(traceevent::print::parse_print_fmt( + header, + print_fmt.as_bytes(), + )?) + }; + + let make_schema = + |print_fmt: PrintFmtStr| match Self::make_print_fmt_schemas( + header, &print_fmt, + ) { + Ok((meta_event_name, full_schema, fields_schema)) => { + let meta_event_name = format!("trace_printk@{meta_event_name}"); + + let select = match only_events { + None => true, + Some(only_events) => only_events.iter().any(|selected| { + meta_event_name.deref() == selected.deref() + }), + }; + + if select { + match TableState::new( + full_schema, + fields_schema, + self.options, + &meta_event_name, + self.scope, + ) { + Ok(state) => EventCtx::Selected(Ok(( + RefCell::new(state), + print_fmt, + ))), + Err(err) => EventCtx::Selected(Err(err)), + } + } else { + EventCtx::NotSelected + } + } + Err(_) => EventCtx::NotSelected, + }; + + let new = match parse_print_fmt() { + Ok(print_fmt) => { + // Find an already-created meta event that would have the same + // print format string, therefore the same schema. + match meta_events_by_fmt.entry(print_fmt) { + Entry::Occupied(entry) => Rc::clone(entry.get()), + Entry::Vacant(entry) => { + let new = Rc::new(make_schema(entry.key().clone())); + entry.insert(Rc::clone(&new)); + new + } + } + } + Err(_) => Rc::new(EventCtx::Selected(Err(MainError::NotAMetaEvent))), + }; + + handle!(entry.insert(new)) + } + } + } + } + } + + fn drain_states(self) -> impl Iterator> { + match self.variant { + ReadStateVariant::Generic(state) => { + Box::new([state].into_iter()) as Box> + } + ReadStateVariant::BPrint { + generic, + mut meta_events_by_fmt, + meta_events_by_addr, + .. + } => { + // Ensure we kill all the Rc that could be pointing at the meta event entries + // before trying to unwrap the Rc + drop(meta_events_by_addr); + Box::new([generic].into_iter().chain( + std::iter::from_fn(move || meta_events_by_fmt.pop_first()).filter_map( + |(_, entry)| match Rc::into_inner(entry).unwrap() { + EventCtx::Selected(entry) => { + let (table_state, _) = entry.ok()?; + let table_state = RefCell::into_inner(table_state); + Some(table_state) + } + _ => None, + }, + ), + )) as Box> + } + } + } + + fn make_event_desc_schemas( + header: &Header, + event_desc: &EventDesc, + ) -> Result<(Schema, Schema), MainError> { + let struct_fmt = &event_desc.event_fmt()?.struct_fmt()?; + let fields = &struct_fmt.fields; + Self::make_schemas( + &event_desc.name, + header, + fields.iter().map(|fmt| { + ( + fmt.declaration.identifier.to_string(), + fmt.declaration.typ.clone(), + ) + }), + ) + } + + fn make_print_fmt_schemas( + header: &Header, + fmt: &PrintFmtStr, + ) -> Result<(String, Schema, Schema), MainError> { + let field_name_parser = || { + nom::sequence::preceded( + nom::multi::many0(nom::character::complete::char(' ')), + nom::sequence::terminated(identifier(), nom::character::complete::char('=')), + ) + }; + + let mut event_name = None; + let mut field_name = None; + + let fields = fmt.atoms.iter().enumerate().filter_map(|(i, atom)| { + if i == 0 { + match atom { + PrintAtom::Fixed(fixed) => { + let res = nom::combinator::all_consuming(nom::sequence::separated_pair( + identifier(), + nom::character::complete::char(':'), + field_name_parser(), + )) + .parse(fixed.as_bytes()) + .finish(); + match res { + Ok((_, (_event_name, _field_name))) => { + field_name = Some(_field_name); + event_name = Some(_event_name); + None + } + Err(()) => None, + } + } + _ => None, + } + } else { + match atom { + PrintAtom::Fixed(fixed) => { + let _ = nom::combinator::all_consuming(field_name_parser()) + .parse(fixed.as_bytes()) + .finish() + .map(|(_, name)| { + field_name = Some(name); + }); + None + } + PrintAtom::Variable { vbin_spec, .. } => { + let typ = match vbin_spec { + VBinSpecifier::U8 => Type::U8, + VBinSpecifier::I8 => Type::I8, + + VBinSpecifier::U16 => Type::U16, + VBinSpecifier::I16 => Type::I16, + + VBinSpecifier::U32 => Type::U32, + VBinSpecifier::I32 => Type::I32, + + VBinSpecifier::U64 => Type::U64, + VBinSpecifier::I64 => Type::I64, + + VBinSpecifier::Str => Type::Array( + Box::new(header.kernel_abi().char_typ()), + ArrayKind::ZeroLength, + ), + }; + Some(match &field_name { + None => Err(MainError::NotAMetaEvent), + Some(name) => Ok((name.deref().into(), typ)), + }) + } + } + } + }); + let fields: Result, MainError> = fields.collect(); + let mut fields = fields?; + fields.push(("common_pid".into(), Type::I32)); + let event_name = event_name.ok_or(MainError::NotAMetaEvent)?; + let (full_schema, fields_schema) = Self::make_schemas(&event_name, header, fields)?; + Ok((event_name.into(), full_schema, fields_schema)) + } + + fn make_schemas( + event_name: &str, + header: &Header, + fields: FieldsIterator, + ) -> Result<(Schema, Schema), MainError> + where + FieldsIterator: IntoIterator, + { + let char_typ = header.kernel_abi().char_typ(); + let long_size = header.kernel_abi().long_size; + + let field_cols = fields.into_iter().map(|(name, typ)| { + fn guess_typ(typ: &Type, char_typ: &Type, long_size: &LongSize) -> Result { + let recurse = |typ| guess_typ(typ, char_typ, long_size); + match typ { + Type::Bool => Ok(DataType::Boolean), + Type::U8 => Ok(DataType::UInt8), + Type::U16 => Ok(DataType::UInt16), + Type::U32 => Ok(DataType::UInt32), + Type::U64 => Ok(DataType::UInt64), + Type::I8 => Ok(DataType::Int8), + Type::I16 => Ok(DataType::Int16), + Type::I32 => Ok(DataType::Int32), + Type::I64 => Ok(DataType::Int64), + + // char [] are considered as strings + Type::Array(inner, _) | Type::Pointer(inner) if &**inner == char_typ => Ok(DataType::Utf8), + + // u8 [] are considered as byte buffer + Type::Array(inner, _) | Type::Pointer(inner) if matches!(&**inner, Type::Typedef(_, name) if name == "u8") => Ok(DataType::Binary), + + Type::Array(inner, _) | Type::Pointer(inner) if matches!( + inner.resolve_wrapper(), + Type::Bool | Type::U8 | Type::I8 | Type::U16 | Type::I16 | Type::U32 | Type::I32 | Type::U64 | Type::I64 + ) => Ok(DataType::List(Box::new(Field::new( + "", + recurse(inner)?, + true, + )))), + + Type::Pointer(..) => match long_size { + LongSize::Bits32 => Ok(DataType::UInt32), + LongSize::Bits64 => Ok(DataType::UInt64), + }, + + Type::Typedef(_, id) if id.deref() == "cpumask_t" => Ok(DataType::List(Box::new( + Field::new( + "cpumask_t", + DataType::Boolean, + false, + ) + ))), + + // TODO: try to do symbolic resolution of enums somehow, maybe with BTF + // Do we want that always ? What about conversion from other formats where the + // enum is not available ? Maybe that should be left to a Python function, + // hooked with the BTF parser, and BTF available in platform info. + Type::Typedef(typ, _) | Type::Enum(typ, _) | Type::DynamicScalar(typ, _) => recurse(typ), + + // TODO: handle DynamicScalar such as cpumasks, either as extension types or + // simply as DataType::Binary, decoded to be little endian and 8bit-word based + // instead of the weird chunked format of the kernel. + // Or maybe kernel bitmap can be turned into a bitmap type of parquet if it is + // supported. We need to check polars/pandas support for those as well. + typ => Err(MainError::TypeNotHandled(Box::new(typ.clone()))), + } + } + let typ = guess_typ(&typ, &char_typ, &long_size).map_err(|err| err.with_field(Some(event_name), &name))?; + Ok(Field::new(name, typ, true)) + }); + let field_cols: Result, MainError> = field_cols.collect(); + let field_cols = field_cols?; + + let fields_schema = Schema::from(field_cols.clone()); + let full_schema = Schema::from( + FixedCols::arrow_fields() + .chain(field_cols) + .collect::>(), + ); + Ok((full_schema, fields_schema)) + } +} + +#[derive(Debug)] +struct TableState<'scope> { + name: String, + path: PathBuf, + fields_schema: Schema, + fixed_cols: FixedCols, + field_cols: Vec, + + sender: Sender, + handle: ScopedJoinHandle<'scope, Result<(), MainError>>, + errors: TableErrors, +} + +struct EventWriteState { + full_schema: Schema, + options: WriteOptions, + writer: FileWriter, + count: u64, +} + +impl<'scope> TableState<'scope> { + fn new( + full_schema: Schema, + fields_schema: Schema, + options: WriteOptions, + name: &str, + scope: &'scope Scope, + ) -> Result { + let (fixed_cols, field_cols) = Self::make_cols(name, &fields_schema)?; + + let path = PathBuf::from(format!("{}.parquet", name)); + let file = File::create(&path)?; + let writer = FileWriter::try_new(file, full_schema.clone(), options)?; + let (sender, receiver) = bounded(128); + + let mut write_state = EventWriteState { + full_schema, + options, + writer, + count: 0, + }; + let write_thread = move |_: &_| -> Result<_, MainError> { + for chunk in receiver.iter() { + write_state.dump_to_file(chunk)?; + } + write_state.writer.end(None)?; + Ok(()) + }; + + let handle = scope.spawn(write_thread); + + Ok(TableState { + field_cols, + fixed_cols, + fields_schema, + sender, + name: name.to_string(), + handle, + path, + errors: TableErrors::new(), + }) + } + + fn make_cols(name: &str, schema: &Schema) -> Result<(FixedCols, Vec), MainError> { + macro_rules! make_array { + ($variant:path) => { + Ok($variant(MutablePrimitiveArray::with_capacity(CHUNK_SIZE))) + }; + } + let make_col = |field: &Field| match &field.data_type { + DataType::Int8 => make_array!(FieldArray::I8), + DataType::Int16 => make_array!(FieldArray::I16), + DataType::Int32 => make_array!(FieldArray::I32), + DataType::Int64 => make_array!(FieldArray::I64), + + DataType::UInt8 => make_array!(FieldArray::U8), + DataType::UInt16 => make_array!(FieldArray::U16), + DataType::UInt32 => make_array!(FieldArray::U32), + DataType::UInt64 => make_array!(FieldArray::U64), + + DataType::Boolean => Ok(FieldArray::Bool(MutableBooleanArray::with_capacity( + CHUNK_SIZE, + ))), + DataType::Utf8 => Ok(FieldArray::Str(MutableUtf8Array::with_capacity(CHUNK_SIZE))), + DataType::Binary => Ok(FieldArray::Binary(MutableBinaryArray::with_capacity( + CHUNK_SIZE, + ))), + + DataType::List(field) + if matches!( + field.deref(), + Field { + data_type: DataType::Boolean, + .. + } + ) => + { + Ok(FieldArray::ListBool(MutableListArray::with_capacity( + CHUNK_SIZE, + ))) + } + + DataType::List(field) + if matches!( + field.deref(), + Field { + data_type: DataType::UInt8, + .. + } + ) => + { + Ok(FieldArray::ListU8(MutableListArray::with_capacity( + CHUNK_SIZE, + ))) + } + DataType::List(field) + if matches!( + field.deref(), + Field { + data_type: DataType::UInt16, + .. + } + ) => + { + Ok(FieldArray::ListU16(MutableListArray::with_capacity( + CHUNK_SIZE, + ))) + } + DataType::List(field) + if matches!( + field.deref(), + Field { + data_type: DataType::UInt32, + .. + } + ) => + { + Ok(FieldArray::ListU32(MutableListArray::with_capacity( + CHUNK_SIZE, + ))) + } + DataType::List(field) + if matches!( + field.deref(), + Field { + data_type: DataType::UInt64, + .. + } + ) => + { + Ok(FieldArray::ListU64(MutableListArray::with_capacity( + CHUNK_SIZE, + ))) + } + + DataType::List(field) + if matches!( + field.deref(), + Field { + data_type: DataType::Int8, + .. + } + ) => + { + Ok(FieldArray::ListI8(MutableListArray::with_capacity( + CHUNK_SIZE, + ))) + } + DataType::List(field) + if matches!( + field.deref(), + Field { + data_type: DataType::Int16, + .. + } + ) => + { + Ok(FieldArray::ListI16(MutableListArray::with_capacity( + CHUNK_SIZE, + ))) + } + DataType::List(field) + if matches!( + field.deref(), + Field { + data_type: DataType::Int32, + .. + } + ) => + { + Ok(FieldArray::ListI32(MutableListArray::with_capacity( + CHUNK_SIZE, + ))) + } + DataType::List(field) + if matches!( + field.deref(), + Field { + data_type: DataType::Int64, + .. + } + ) => + { + Ok(FieldArray::ListI64(MutableListArray::with_capacity( + CHUNK_SIZE, + ))) + } + + typ => Err(MainError::ArrowDataTypeNotHandled(Box::new(typ.clone()))), + }; + + let fields: Result, MainError> = schema + .fields + .iter() + .map(|field| make_col(field).map_err(|err| err.with_field(Some(name), &field.name))) + .collect(); + let fields = fields?; + + let fixed = FixedCols::new(); + Ok((fixed, fields)) + } + + fn extract_chunk(&mut self) -> Result { + let (mut fixed_cols, mut field_cols) = Self::make_cols(&self.name, &self.fields_schema)?; + + assert_eq!(field_cols.len(), self.field_cols.len()); + core::mem::swap(&mut self.field_cols, &mut field_cols); + core::mem::swap(&mut self.fixed_cols, &mut fixed_cols); + + Ok(Chunk::new( + fixed_cols + .into_arcs() + .chain(field_cols.into_iter().map(|col| col.into_arc())) + .collect(), + )) + } +} + +#[derive(Debug)] +struct TableErrors { + errors: Vec, +} + +// Cap the amount of errors accumulated so we don't end up with ridiculously large memory +// consumption or JSON files +const MAX_EVENT_ERRORS: usize = 64 * 1024; + +impl TableErrors { + fn new() -> Self { + TableErrors { errors: Vec::new() } + } + fn extend_errors>>(&mut self, iter: I) { + let mut len = self.errors.len(); + for res in iter.into_iter() { + if let Err(err) = res { + if len <= MAX_EVENT_ERRORS { + len += 1; + self.errors.push(err); + } + } + } + } +} + +impl EventWriteState { + fn dump_to_file(&mut self, chunk: ArrayChunk) -> Result<(), MainError> { + self.count += 1; + + let row_groups = RowGroupIterator::try_new( + [Ok(chunk)].into_iter(), + &self.full_schema, + self.options, + self.full_schema + .fields + .iter() + .map(|_| vec![Encoding::Plain]) + .collect(), + )?; + + for group in row_groups { + let group = group?; + self.writer.write(group)?; + } + Ok(()) + } +} + +// fn main2() -> Result<(), ArrowError> { +// // declare arrays +// let a = Int8Array::from(&[Some(1), None, Some(3)]); +// let b = Int32Array::from(&[Some(2), None, Some(6)]); + +// // declare a schema with fields +// let schema = Schema::from(vec![ +// Field::new("c2", DataType::Int32, true), +// Field::new("c1", DataType::Int8, true), +// ]); + +// // declare chunk +// let chunk = Chunk::new(vec![a.arced(), b.arced()]); + +// // write to parquet (probably the fastest implementation of writing to parquet out there) +// let options = WriteOptions { +// write_statistics: false, +// compression: CompressionOptions::Snappy, +// version: Version::V1, +// data_pagesize_limit: None, +// }; + +// let row_groups = RowGroupIterator::try_new( +// vec![Ok(chunk)].into_iter(), +// &schema, +// options, +// vec![vec![Encoding::Plain], vec![Encoding::Plain]], +// )?; + +// // anything implementing `std::io::Write` works +// // let mut file = vec![]; +// let file = File::create("hello.pq").unwrap(); +// let mut writer = FileWriter::try_new(file, schema, options)?; + +// // Write the file. +// for group in row_groups { +// writer.write(group?)?; +// } +// let _ = writer.end(None)?; +// Ok(()) +// } diff --git a/tools/trace-parser/trace-tools/src/lib/print.rs b/tools/trace-parser/trace-tools/src/lib/print.rs new file mode 100644 index 0000000000..1070fc6f64 --- /dev/null +++ b/tools/trace-parser/trace-tools/src/lib/print.rs @@ -0,0 +1,226 @@ +use std::{io::Write, ops::Deref as _}; + +use traceevent::{ + buffer::{flyrecord, BufferError, EventVisitor}, + cinterp::{BufferEnv, CompileError, Value}, + header::{EventDesc, FieldFmt, Header, HeaderError}, + io::BorrowingRead, + print::{PrintError, StringWriter}, + scratch::ScratchAlloc, +}; + +use crate::error::DynMultiError; + +#[allow(clippy::enum_variant_names)] +#[derive(thiserror::Error, Debug)] +#[non_exhaustive] +pub enum MainError { + #[error("Error while loading data: {0}")] + IoError(#[from] std::io::Error), + + #[error("Error while parsing header: {0}")] + HeaderError(#[from] HeaderError), + + #[error("Error while parsing buffer: {0}")] + BufferError(#[from] BufferError), + + #[error("Error while parsing pretty printing: {0}")] + PrintError(#[from] PrintError), + + #[error("Unexpected type for PID value")] + PIDTypeError, +} + +pub fn print_events( + header: &Header, + reader: R, + mut out: W, +) -> Result<(), DynMultiError> { + let mut nr = 0; + let scratch = &mut ScratchAlloc::new(); + + let buffers = header.buffers(reader).unwrap(); + let buf_id_len = buffers + .iter() + .map(|buf| buf.id.name.len()) + .max() + .unwrap_or(0); + + struct EventCtx<'h> { + pid_fmt: Option<&'h FieldFmt>, + } + impl<'h> EventCtx<'h> { + fn from_event_desc(_header: &'h Header, desc: &'h EventDesc) -> Self { + let get = || { + let struct_fmt = &desc.event_fmt().ok()?.struct_fmt().ok()?; + let pid_fmt = struct_fmt.field_by_name("common_pid")?; + Some(pid_fmt) + }; + let pid_fmt = get(); + + EventCtx { pid_fmt } + } + } + + macro_rules! visit { + ($visitor:expr) => {{ + let visitor = $visitor; + nr += 1; + let ts = visitor.timestamp; + let buf_id = visitor.buffer_id; + let header = &visitor.header; + let data = &visitor.data; + let name = &visitor.event_name()?; + let desc = visitor.event_desc()?; + let ctx: &EventCtx = visitor.event_ctx()?; + + let pid_fmt = ctx.pid_fmt.ok_or_else(|| { + let err: BufferError = CompileError::UnknownField("common_pid".into()).into(); + err + })?; + + let pid = visitor.field_by_fmt(pid_fmt)?; + let pid = match pid { + Value::I64Scalar(x) => Ok(x.try_into().unwrap()), + _ => Err(MainError::PIDTypeError), + }?; + + let comm = match pid { + 0 => "", + pid => visitor + .header + .comm_of(pid) + .map(|s| s.deref()) + .unwrap_or("<...>"), + }; + + match buf_id.name.deref() { + "" => write!( + &mut out, + "{: 0 { buf_id_len + 2 } else { 0 } + )?, + name => write!(&mut out, "{name:16}-{pid:<5} [{buf_cpu:0>3}] {ts_sec}.{ts_dec:0>9}: {}:{:>name_pad$} ", + name, + "", + name_pad = 20usize.saturating_sub(name.len()), + )?; + + let env = BufferEnv::new(scratch, header, data); + + // rustc fails to infer Higher Rank Trait Bound (HRTB) for the lifetime of references + // passed as parameters, so we force them to be + fn ensure_hrtb(f: F) -> F + where + F: FnMut(&mut T1, &T2) -> U, + { + f + } + + let mut print_raw = ensure_hrtb(|out: &mut W, visitor: &EventVisitor<_, _>| { + for (fmt, val) in &mut visitor.fields()? { + let val = val?; + // let val = Value::U64Scalar(0xffff800009db03f8); + let derefed = val.deref_ptr(&env); + let val = match derefed { + Ok(val) => val, + Err(_) => { + drop(derefed); + val + } + }; + let field_name = &fmt.declaration.identifier; + match val.to_str() { + Some(s) => write!(out, " {field_name}={s}")?, + None => write!(out, " {field_name}={val}")?, + } + } + Ok(()) + }); + + let mut print_pretty = ensure_hrtb(|out: &mut W, _visitor: &EventVisitor<_, _>| { + let print_fmt = &desc.event_fmt()?.print_fmt()?; + let print_args = &desc.event_fmt()?.print_args()?; + + let print_args = print_args.iter().map(|spec| -> Result<_, PrintError> { + match spec { + Err(err) => Err(err.clone().into()), + // Transmit the error to interpolate_values() so it can display it + // at the right spot. It will then fail with the error after having + // printed to the output. + Ok(eval) => eval.eval(&env).map_err(Into::into), + } + }); + let mut out = StringWriter::new(out); + print_fmt + .interpolate_values(header, &env, &mut out, print_args) + .map_err(MainError::PrintError) + }); + + let pretty = true; + if pretty { + match print_pretty(&mut out, &visitor) { + Ok(x) => Ok(x), + Err(err) => { + write!(&mut out, "ERROR WHILE PRETTY PRINTING: {err}")?; + print_raw(&mut out, &visitor) + } + } + } else { + print_raw(&mut out, &visitor) + } + }}; + } + + macro_rules! print_event { + () => {{ + |record| -> Result<(), MainError> { + let display_err = |out: &mut W, err: MainError| { + write!(out, "ERROR: {err}")?; + Ok(()) + }; + + let res = match record { + Ok(visitor) => { + match visit!(visitor) { + // Ignore any error while printing the event, as any C evaluation error will have + // been embedded in the printed string already. + Err(MainError::PrintError(_)) => Ok(()), + // Recoverable errors that affected one event only + Err(err @ MainError::HeaderError(_) | err @ MainError::BufferError(_)) => { + display_err(&mut out, err) + } + res => res, + } + } + Err(err) => match err { + // Display recoverable errors + err @ BufferError::LostEvents(..) => display_err(&mut out, err.into()), + // Propagate non-recoverable ones + err => Err(err.into()), + }, + }; + + // Reduce the overhead of reseting the scratch allocator. + if (nr % 16) == 0 { + scratch.reset(); + } + writeln!(&mut out)?; + res + } + }}; + } + let res: Result<_, MainError> = + flyrecord(buffers, print_event!(), EventCtx::from_event_desc).map_err(Into::into); + let res = res?.into_iter().collect::>(); + Ok(res?) +} diff --git a/tools/trace-parser/traceevent/.gitignore b/tools/trace-parser/traceevent/.gitignore new file mode 100644 index 0000000000..3765cea38e --- /dev/null +++ b/tools/trace-parser/traceevent/.gitignore @@ -0,0 +1,4 @@ +debug/ +target/ +Cargo.lock +**/*.rs.bk diff --git a/tools/trace-parser/traceevent/Cargo.toml b/tools/trace-parser/traceevent/Cargo.toml new file mode 100644 index 0000000000..5179de8089 --- /dev/null +++ b/tools/trace-parser/traceevent/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "traceevent" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +nom = "7.1" +nom_locate = "4.0" +smartstring = "1.0" +thiserror = "1.0" +memmap2 = "0.9" +deref-map = "0.1.0" +genawaiter = "0.99.1" +bumpalo = {version="3.12", features=["collections", "boxed"]} +# Can be replaced with std::cell::OnceCell the day it's stabilized +once_cell = "1.17" +itertools = "0.12" +bitflags = "2.0" +libdeflater = {version = "1.19", features=["freestanding"]} +bytemuck = "1.13" +thread_local = "1.1" + +[target.'cfg(target_arch = "x86_64")'.dependencies] +zstd = "0.13" + +# zstd fails to (cross) compile so we just use an alternative library. However, +# that alternative library is also slower. +# https://github.com/gyscos/zstd-rs/issues/257 +[target.'cfg(not(target_arch = "x86_64"))'.dependencies] +ruzstd = "0.5.0" + diff --git a/tools/trace-parser/traceevent/src/array.rs b/tools/trace-parser/traceevent/src/array.rs new file mode 100644 index 0000000000..06fe7bae20 --- /dev/null +++ b/tools/trace-parser/traceevent/src/array.rs @@ -0,0 +1,126 @@ +use core::{ + borrow::Borrow, + cmp::Ordering, + convert::AsRef, + fmt, + hash::{Hash, Hasher}, + ops::Deref, +}; +use std::sync::Arc; + +#[derive(Debug, Clone)] +pub enum Array<'a, T> { + // Unlike Str, this lacks an Owned variant since it's typically not so + // useful and would either require using heap allocation (slow) or bring in + // an extra dependency like tinyvec. + Borrowed(&'a [T]), + Arc(Arc<[T]>), +} + +impl<'a, T> Array<'a, T> { + #[inline] + pub fn into_static(self) -> Array<'static, T> + where + T: Clone, + { + match self { + Array::Borrowed(slice) => Array::Arc(slice.into()), + Array::Arc(arr) => Array::Arc(arr), + } + } +} + +impl<'a, T> Deref for Array<'a, T> { + type Target = [T]; + + #[inline] + fn deref(&self) -> &Self::Target { + match self { + Array::Borrowed(s) => s, + Array::Arc(s) => s, + } + } +} + +impl<'a, T> AsRef<[T]> for Array<'a, T> { + #[inline] + fn as_ref(&self) -> &[T] { + self.deref() + } +} + +impl<'a, T> From<&'a [T]> for Array<'a, T> { + #[inline] + fn from(arr: &'a [T]) -> Array<'a, T> { + Array::Borrowed(arr) + } +} + +impl<'r, 'a: 'r, T> From<&'r Array<'a, T>> for &'r [T] { + #[inline] + fn from(arr: &'r Array<'a, T>) -> &'r [T] { + arr + } +} + +impl<'a, T> From<&Array<'a, T>> for Vec { + #[inline] + fn from(arr: &Array<'a, T>) -> Vec { + arr.into() + } +} + +impl<'a, T: PartialEq> PartialEq for Array<'a, T> { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} + +impl<'a, T: Eq> Eq for Array<'a, T> {} + +impl<'a, T: PartialOrd> PartialOrd for Array<'a, T> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + self.deref().partial_cmp(other.deref()) + } +} + +impl<'a, T: Ord> Ord for Array<'a, T> { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.deref().cmp(other.deref()) + } +} + +impl<'a, T: Hash> Hash for Array<'a, T> { + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash(self.deref(), state) + } +} + +impl<'a, T> Borrow<[T]> for Array<'a, T> { + #[inline] + fn borrow(&self) -> &[T] { + self + } +} + +impl<'a, T> fmt::Display for Array<'a, T> +where + T: fmt::Display, +{ + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + write!(f, "[")?; + for (i, x) in self.iter().enumerate() { + if i != 0 { + write!(f, ",")?; + } + fmt::Display::fmt(x, f)?; + } + write!(f, "]")?; + Ok(()) + } +} diff --git a/tools/trace-parser/traceevent/src/buffer.rs b/tools/trace-parser/traceevent/src/buffer.rs new file mode 100644 index 0000000000..e6648fca25 --- /dev/null +++ b/tools/trace-parser/traceevent/src/buffer.rs @@ -0,0 +1,1528 @@ +use core::{ + cell::UnsafeCell, + fmt::{Debug, Formatter}, + marker::PhantomData, + ops::{Deref, DerefMut as _}, +}; +use std::{ + collections::{btree_map::Entry, BTreeMap}, + io, + sync::{Arc, Mutex}, +}; + +use bytemuck::cast_slice; +use deref_map::DerefMap; +use genawaiter::{sync::gen, yield_}; +use once_cell::unsync::OnceCell; +use smartstring::alias::String; + +use crate::{ + array, + cinterp::{Bitmap, BufferEnv, CompileError, SockAddr, SockAddrKind, Value}, + closure::make_closure_coerce_type, + compress::Decompressor, + cparser::{ArrayKind, DynamicKind, Type}, + header::{ + buffer_locations, Abi, BufferId, EventDesc, EventId, FieldFmt, FileSize, Header, + HeaderError, HeaderV6, HeaderV7, LongSize, MemAlign, MemOffset, MemSize, Options, + Signedness, Timestamp, CPU, + }, + io::{BorrowingRead, BorrowingReadCore}, + iterator::MergedIterator, + print::{PrintAtom, PrintFmtStr, PrintPrecision, PrintWidth, PrintArg, VBinSpecifier}, + scratch::{ScratchAlloc, ScratchVec}, + str::Str, + error::convert_err_impl, +}; + +// Keep a BTreeMap for fast lookup and to avoid huge Vec allocation in case +// some event ids are very large. Since most traces will contain just a few +// types of events, build up the smallest mapping as it goes. +struct EventDescMap<'h, Ctx, MakeCtx> { + header: &'h Header, + cold_map: BTreeMap, + hot_map: BTreeMap, + make_ctx: Arc>, +} + +impl<'h, Ctx: Debug, MakeCtx> Debug for EventDescMap<'h, Ctx, MakeCtx> { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + f.debug_struct("EventDescMap") + .field("cold_map", &self.cold_map) + .field("hot_map", &self.hot_map) + .finish_non_exhaustive() + } +} + +impl<'h, Ctx, MakeCtx> EventDescMap<'h, Ctx, MakeCtx> +where + MakeCtx: FnMut(&'h Header, &'h EventDesc) -> Ctx + 'h, +{ + fn new(header: &'h Header, make_ctx: Arc>) -> Self { + EventDescMap { + header, + cold_map: header + .event_descs() + .into_iter() + .map(|desc| (desc.id, desc)) + .collect(), + hot_map: BTreeMap::new(), + make_ctx, + } + } + #[inline] + fn lookup<'edm>(&'edm mut self, id: EventId) -> Option<(&'h EventDesc, &'edm Ctx)> { + match self.hot_map.entry(id) { + Entry::Occupied(entry) => { + let (desc, ctx) = entry.into_mut(); + Some((*desc, ctx)) + } + Entry::Vacant(entry) => match self.cold_map.remove(&id) { + Some(desc) => { + let mut make_ctx = self.make_ctx.lock().unwrap(); + let (desc, ctx) = entry.insert((desc, make_ctx(self.header, desc))); + Some((*desc, ctx)) + } + None => None, + }, + } + } +} + +pub struct EventVisitor<'i, 'h, 'edm, MakeCtx, Ctx = ()> { + pub data: &'i [u8], + pub header: &'h Header, + + pub timestamp: Timestamp, + pub buffer_id: &'h BufferId, + + // Using UnsafeCell ensures that the compiler understands that anything derived from what we + // stored in it can change at any time, even if the EventVisitor is only manipulated via shared + // ref. + _desc_map: UnsafeCell< + // Using *mut here means EventVisitor is invariant in any lifetime contained in Ctx. + // However, the only values we store in the EventDescMap are either owned by it or have a + // longer lifetime ('h outlives 'edm), so it's sound to be covariant in 'edm. So in + // practice we use 'static but then we cast back to 'h. + *mut EventDescMap<'static, Ctx, MakeCtx>, + >, + // What we really store is: + // &'edm mut EventDescMap<'h, Ctx, MakeCtx>, + // But because of variance limitation, we use *mut instead of &mut and we use 'static instead + // of 'h + _phantom_desc_map: PhantomData<( + &'edm mut EventDescMap<'static, Ctx, MakeCtx>, + &'edm EventDescMap<'h, Ctx, MakeCtx>, + )>, + + scratch: &'i ScratchAlloc, + + // Unfortunately for now OnceCell<'h> makes EventVisitor invariant in 'h: + // https://github.com/matklad/once_cell/issues/167 + // The tracking issue for LazyCell also lists the variance issue: + // https://github.com/rust-lang/rust/issues/109736 + // So to stay covariant in 'h, we use *const instead of &'h. This is fine as we only initialize + // the OnceCell with a value that does live for 'h, as all the inputs of that computation are + // stored when the EventVisitor is created. + event_desc: OnceCell<(*const EventDesc, *const Ctx)>, +} + +impl<'i, 'h, 'edm, MakeCtx, Ctx> EventVisitor<'i, 'h, 'edm, MakeCtx, Ctx> { + fn new( + header: &'h Header, + buffer_id: &'h BufferId, + timestamp: Timestamp, + data: &'i [u8], + scratch: &'i ScratchAlloc, + desc_map: &'edm mut EventDescMap<'h, Ctx, MakeCtx>, + ) -> Self { + // SAFETY: Erase the lifetime 'h and replace by 'static so that we stay covariant in 'h. We + // won't be using the desc_map reference past 'h since: + // * 'h outlives 'edm + // * we don't leak self.desc_map anywhere without attaching the 'edm lifetime to what was + // leaked + let desc_map: &'edm mut EventDescMap<'static, Ctx, MakeCtx> = { + let desc_map: &'edm mut EventDescMap<'h, Ctx, MakeCtx> = desc_map; + unsafe { core::mem::transmute(desc_map) } + }; + + EventVisitor { + data, + header, + timestamp, + buffer_id, + scratch, + _desc_map: UnsafeCell::new(desc_map), + event_desc: OnceCell::new(), + _phantom_desc_map: PhantomData, + } + } + + fn __check_covariance_i<'i1>(self) -> EventVisitor<'i1, 'h, 'edm, MakeCtx, Ctx> + where + 'i: 'i1, + { + self + } + fn __check_covariance_h<'h1>(self) -> EventVisitor<'i, 'h1, 'edm, MakeCtx, Ctx> + where + 'h: 'h1, + { + self + } + fn __check_covariance_edm<'edm1>(self) -> EventVisitor<'i, 'h, 'edm1, MakeCtx, Ctx> + where + 'edm: 'edm1, + { + self + } +} + +// Capture a lifetime syntactically to avoid E0700 when using impl in return position +pub trait CaptureLifetime<'a> {} +impl CaptureLifetime<'_> for T {} + +impl<'i, 'h, 'edm, MakeCtx, Ctx> EventVisitor<'i, 'h, 'edm, MakeCtx, Ctx> +where + MakeCtx: 'h + FnMut(&'h Header, &'h EventDesc) -> Ctx, +{ + pub fn fields<'a>( + &'a self, + ) -> Result< + impl Iterator, BufferError>)> + + CaptureLifetime<'h> + + CaptureLifetime<'edm> + + CaptureLifetime<'i>, + BufferError, + > { + let event_desc = self.event_desc()?; + let struct_fmt = &event_desc.event_fmt()?.struct_fmt()?; + let mut fields = struct_fmt.fields.iter(); + + Ok(std::iter::from_fn(move || { + let desc = fields.next()?; + let offset = desc.offset; + let size = desc.size; + let field_data = &self.data[offset..(offset + size)]; + + Some(( + desc, + desc.decoder + .decode(self.data, field_data, self.header, self.scratch), + )) + })) + } + + pub fn field_by_name<'a>( + &'a self, + name: &str, + ) -> Result<(&'a FieldFmt, Value<'a>), BufferError> { + let event_desc = self.event_desc()?; + let struct_fmt = &event_desc.event_fmt()?.struct_fmt()?; + let field_fmt = struct_fmt + .field_by_name(name) + .ok_or_else(|| CompileError::UnknownField(name.into()))?; + + let val = self.field_by_fmt(field_fmt)?; + Ok((field_fmt, val)) + } + pub fn field_by_fmt<'a>(&'a self, field_fmt: &FieldFmt) -> Result, BufferError> { + let offset = field_fmt.offset; + let size = field_fmt.size; + let field_data = &self.data[offset..(offset + size)]; + + field_fmt + .decoder + .decode(self.data, field_data, self.header, self.scratch) + } + + pub fn event_id(&self) -> Result { + let parse_u16 = |input| self.header.kernel_abi().parse_u16(input); + let (_, event_id) = parse_u16(self.data)?; + Ok(event_id) + } + + pub fn event_name(&self) -> Result<&str, BufferError> { + let desc = self.event_desc()?; + Ok(&desc.name) + } + + // FIXME: is it sound to derive that &mut EventDescMap from an &self ? Could this lead to + // creating multiple &mut ref alive at the same time ? + fn desc_map(&'edm self) -> &'edm mut EventDescMap<'h, Ctx, MakeCtx> { + // SAFETY: This comes from an &'edm mut reference in the first place, and since + // EventVisitor::new() requires an &'edm mut EventDescMap, we cannot accidentally + // borrow it mutably more than once. This makes it safe to turn it back to an &'edm + // mut. + let desc_map: *mut *mut EventDescMap<'static, Ctx, MakeCtx> = self._desc_map.get(); + let desc_map: &'edm mut EventDescMap<'static, Ctx, MakeCtx> = unsafe { &mut **desc_map }; + let desc_map: &'edm mut EventDescMap<'h, Ctx, MakeCtx> = + unsafe { core::mem::transmute(desc_map) }; + desc_map + } + + fn event_entry(&self) -> Result<(&'h EventDesc, &'edm Ctx), BufferError> { + self.event_desc + .get_or_try_init(|| { + let event_id = self.event_id()?; + let not_found = || BufferError::EventDescriptorNotFound(event_id); + let (desc, ctx) = self.desc_map().lookup(event_id).ok_or_else(not_found)?; + Ok((desc, ctx)) + }) + .map(|(desc, ctx)| { + let ctx: *const Ctx = *ctx; + let desc: *const EventDesc = *desc; + // SAFETY: EventDescMap::lookup() returns (&'h EventDesc, &'edm Ctx), which we store + // as (*const EventDesc, *const Ctx) to avoid variance issues. It's therefore + // completely safe to just cast it back to &'h EventDesc. + let desc: &'h EventDesc = unsafe { &*desc }; + let ctx: &'edm Ctx = unsafe { &*ctx }; + (desc, ctx) + }) + } + + pub fn event_desc(&self) -> Result<&'h EventDesc, BufferError> { + Ok(self.event_entry()?.0) + } + + pub fn event_ctx(&self) -> Result<&'edm Ctx, BufferError> { + Ok(self.event_entry()?.1) + } + + #[inline] + pub fn buffer_env(&self) -> BufferEnv { + BufferEnv::new(self.scratch, self.header, self.data) + } + + #[inline] + pub fn vbin_fields<'a>( + &self, + print_fmt: &'a PrintFmtStr, + data: &'a [u32], + ) -> impl IntoIterator, BufferError>> + where + 'h: 'a, + 'i: 'a, + { + print_fmt.vbin_fields(self.header, self.scratch, data) + } +} + +pub trait FieldDecoder: Send + Sync { + fn decode<'d>( + &self, + event_data: &'d [u8], + field_data: &'d [u8], + header: &'d Header, + bump: &'d ScratchAlloc, + ) -> Result, BufferError>; +} + +impl FieldDecoder for T +where + T: for<'d> Fn( + &'d [u8], + &'d [u8], + &'d Header, + &'d ScratchAlloc, + ) -> Result, BufferError> + + Send + + Sync, +{ + fn decode<'d>( + &self, + event_data: &'d [u8], + field_data: &'d [u8], + header: &'d Header, + bump: &'d ScratchAlloc, + ) -> Result, BufferError> { + self(event_data, field_data, header, bump) + } +} + +impl FieldDecoder for () { + fn decode<'d>( + &self, + _event_data: &'d [u8], + _field_data: &'d [u8], + _header: &'d Header, + _bump: &'d ScratchAlloc, + ) -> Result, BufferError> { + Err(BufferError::NoDecoder) + } +} + +impl Type { + #[allow(clippy::type_complexity)] + #[inline] + pub fn make_decoder( + &self, + header: &Header, + ) -> Result< + Box< + dyn for<'d> Fn( + &'d [u8], + &'d [u8], + &'d Header, + &'d ScratchAlloc, + ) -> Result, BufferError> + + Send + + Sync, + >, + CompileError, + > { + use Type::*; + + let dynamic_decoder = |kind: &DynamicKind| -> Box< + dyn for<'d> Fn( + &'d [u8], + &'d [u8], + &'d Header, + &'d ScratchAlloc, + ) -> Result<&'d [u8], BufferError> + + Send + + Sync, + > { + match kind { + DynamicKind::Dynamic => Box::new( + move |data: &[u8], field_data: &[u8], header: &Header, _scratch| { + let offset_and_size = header.kernel_abi().parse_u32(field_data)?.1; + let offset: usize = (offset_and_size & 0xffff).try_into().unwrap(); + let size: usize = (offset_and_size >> 16).try_into().unwrap(); + Ok(&data[offset..(offset + size)]) + }, + ), + DynamicKind::DynamicRel => Box::new( + move |data: &[u8], field_data: &[u8], header: &Header, _scratch| { + let (remainder, offset_and_size) = + header.kernel_abi().parse_u32(field_data)?; + let next_field_offset = + remainder.as_ptr() as usize - data.as_ptr() as usize; + + let offset: usize = (offset_and_size & 0xffff).try_into().unwrap(); + let size: usize = (offset_and_size >> 16).try_into().unwrap(); + + let offset = next_field_offset + offset; + Ok(&data[offset..(offset + size)]) + }, + ), + } + }; + + match self { + Void => Ok(Box::new(|_, _, _, _| Ok(Value::Unknown))), + Bool => Ok(Box::new(move |_data, field_data, header, _| { + Ok(Value::U64Scalar( + header.kernel_abi().parse_u8(field_data)?.1.into(), + )) + })), + U8 => Ok(Box::new(move |_data, field_data, header, _| { + Ok(Value::U64Scalar( + header.kernel_abi().parse_u8(field_data)?.1.into(), + )) + })), + I8 => Ok(Box::new(move |_data, field_data, header, _| { + Ok(Value::I64Scalar( + (header.kernel_abi().parse_u8(field_data)?.1 as i8).into(), + )) + })), + + U16 => Ok(Box::new(move |_data, field_data, header, _| { + Ok(Value::U64Scalar( + header.kernel_abi().parse_u16(field_data)?.1.into(), + )) + })), + I16 => Ok(Box::new(move |_data, field_data, header, _| { + Ok(Value::I64Scalar( + (header.kernel_abi().parse_u16(field_data)?.1 as i16).into(), + )) + })), + + U32 => Ok(Box::new(move |_data, field_data, header, _| { + Ok(Value::U64Scalar( + header.kernel_abi().parse_u32(field_data)?.1.into(), + )) + })), + I32 => Ok(Box::new(move |_data, field_data, header, _| { + Ok(Value::I64Scalar( + (header.kernel_abi().parse_u32(field_data)?.1 as i32).into(), + )) + })), + + U64 => Ok(Box::new(move |_data, field_data, header, _| { + Ok(Value::U64Scalar( + header.kernel_abi().parse_u64(field_data)?.1, + )) + })), + I64 => Ok(Box::new(move |_data, field_data, header, _| { + Ok(Value::I64Scalar( + header.kernel_abi().parse_u64(field_data)?.1 as i64, + )) + })), + + Pointer(_) => match header.kernel_abi().long_size { + LongSize::Bits32 => U32.make_decoder(header), + LongSize::Bits64 => U64.make_decoder(header), + }, + Typedef(typ, _) | Enum(typ, _) => typ.make_decoder(header), + + DynamicScalar(typ, kind) => { + let decoder = dynamic_decoder(kind); + match typ.deref() { + // Bitmaps created using DECLARE_BITMAP() macro in include/linux/types.h + Type::Typedef(_, id) + if matches!( + id.deref(), + "cpumask_t" | "dma_cap_mask_t" | "nodemask_t" | "pnp_irq_mask_t" + ) => + { + Ok(Box::new( + move |data, field_data: &[u8], header: &Header, scratch| { + let field_data = decoder(data, field_data, header, scratch)?; + Ok(Value::Bitmap(Bitmap::from_bytes( + field_data, + header.kernel_abi(), + ))) + }, + )) + } + + // As described in: + // https://bugzilla.kernel.org/show_bug.cgi?id=217532 + Type::Typedef(_, id) if id.deref() == "sockaddr_t" => Ok(Box::new( + move |data, field_data: &[u8], header: &Header, scratch| { + let field_data = decoder(data, field_data, header, scratch)?; + Ok(Value::SockAddr(SockAddr::from_bytes( + field_data, + header.kernel_abi().endianness, + SockAddrKind::Full, + )?)) + }, + )), + + // Any other dynamic scalar type is unknown, so just provide + // the raw buffer to consumers. + _ => { + let typ = Arc::from(typ.clone()); + Ok(Box::new(move |data, field_data, header, scratch| { + let field_data = decoder(data, field_data, header, scratch)?; + Ok(Value::Raw( + Arc::clone(&typ), + array::Array::Borrowed(field_data), + )) + })) + } + } + } + + Array(typ, ArrayKind::Dynamic(kind)) => { + let data_decoder = dynamic_decoder(kind); + let array_decoder = + Type::Array(typ.clone(), ArrayKind::Fixed(Ok(0))).make_decoder(header)?; + + Ok(Box::new(move |data, field_data, header, scratch| { + let array_data = data_decoder(data, field_data, header, scratch)?; + array_decoder.decode(data, array_data, header, scratch) + })) + } + + Array(typ, ArrayKind::ZeroLength) => { + let decoder = + Type::Array(typ.clone(), ArrayKind::Fixed(Ok(0))).make_decoder(header)?; + + Ok(Box::new(move |data, field_data, header, scratch| { + let offset: usize = field_data.as_ptr() as usize - data.as_ptr() as usize; + // Currently, ZLA fields are buggy as we cannot know the + // true data size. Instead, we get this aligned size, + // which can includes padding junk at the end of the event: + // https://bugzilla.kernel.org/show_bug.cgi?id=210173 + let array_data = &data[offset..]; + decoder.decode(data, array_data, header, scratch) + })) + } + + Array(typ, ArrayKind::Fixed(_)) => { + let item_size = typ.size(header.kernel_abi())?; + let item_size: usize = item_size.try_into().unwrap(); + + macro_rules! parse_scalar { + ($ctor:tt, $item_ty:ty, $parse_item:ident) => {{ + if header.kernel_abi().endianness.is_native() { + Box::new(move |_data, field_data: &[u8], header, scratch| { + match bytemuck::try_cast_slice(field_data) { + Ok(slice) => Ok(Value::$ctor(array::Array::Borrowed(slice))), + // Data is either misaligned or the array + // size is not a multiple of the item size. + Err(_) => { + let mut vec = ScratchVec::with_capacity_in( + field_data.len() / item_size, + scratch, + ); + let item_parser = + |item| header.kernel_abi().$parse_item(item); + for item in field_data.chunks_exact(item_size) { + let item = item_parser(item)?.1 as $item_ty; + vec.push(item) + } + Ok(Value::$ctor(array::Array::Borrowed(vec.leak()))) + } + } + }) + } else { + Box::new(move |_data, field_data: &[u8], header, scratch| { + let mut vec = ScratchVec::with_capacity_in( + field_data.len() / item_size, + scratch, + ); + match bytemuck::try_cast_slice::<_, $item_ty>(field_data) { + Ok(slice) => { + vec.extend(slice.into_iter().map(|x| x.swap_bytes())); + + // Leak the bumpalo's Vec, which is fine because + // we will deallocate it later by calling + // ScratchAlloc::reset(). Note that Drop impl for items + // will _not_ run. + // + // In order for them to run, we would need to + // return an Vec<> instead of a slice, which + // will be possible one day when the unstable + // allocator_api becomes stable so we can + // allocate a real Vec<> in the ScratchAlloc and simply + // return it. + Ok(Value::$ctor(array::Array::Borrowed(vec.leak()))) + } + // Data is either misaligned or the array + // size is not a multiple of the item size. + Err(_) => { + let item_parser = + |item| header.kernel_abi().$parse_item(item); + for item in field_data.chunks_exact(item_size) { + let item = item_parser(item)?.1 as $item_ty; + let item = item.swap_bytes(); + vec.push(item) + } + Ok(Value::$ctor(array::Array::Borrowed(vec.leak()))) + } + } + }) + } + }}; + } + + match typ.resolve_wrapper() { + Type::Bool => Ok(parse_scalar!(U8Array, u8, parse_u8)), + + Type::U8 => Ok(parse_scalar!(U8Array, u8, parse_u8)), + Type::I8 => Ok(parse_scalar!(I8Array, i8, parse_u8)), + + Type::U16 => Ok(parse_scalar!(U16Array, u16, parse_u16)), + Type::I16 => Ok(parse_scalar!(I16Array, i16, parse_u16)), + + Type::U32 => Ok(parse_scalar!(U32Array, u32, parse_u32)), + Type::I32 => Ok(parse_scalar!(I32Array, i32, parse_u32)), + + Type::U64 => Ok(parse_scalar!(U64Array, u64, parse_u64)), + Type::I64 => Ok(parse_scalar!(I64Array, i64, parse_u64)), + + Type::Pointer(_) => match header.kernel_abi().long_size { + LongSize::Bits32 => Ok(parse_scalar!(U32Array, u32, parse_u32)), + LongSize::Bits64 => Ok(parse_scalar!(U64Array, u64, parse_u64)), + }, + + _ => Err(CompileError::InvalidArrayItem(typ.deref().clone())), + } + } + typ => { + let typ = Arc::new(typ.clone()); + Ok(Box::new(move |_data, field_data, _, _| { + Ok(Value::Raw( + Arc::clone(&typ), + array::Array::Borrowed(field_data), + )) + })) + } + } + } +} + +use core::cmp::Ordering; +#[derive(Debug)] +struct BufferItem<'a, Ctx, MakeCtx>( + Result< + ( + &'a Header, + &'a mut EventDescMap<'a, Ctx, MakeCtx>, + &'a BufferId, + Timestamp, + &'a [u8], + ), + BufferError, + >, +); + +impl<'a, Ctx, MakeCtx> PartialEq for BufferItem<'a, Ctx, MakeCtx> { + #[inline] + fn eq(&self, other: &Self) -> bool { + match (&self.0, &other.0) { + (Ok(x), Ok(y)) => x.3 == y.3, + _ => std::ptr::eq(self, other), + } + } +} + +impl<'a, Ctx, MakeCtx> Eq for BufferItem<'a, Ctx, MakeCtx> {} + +impl<'a, Ctx, MakeCtx> PartialOrd for BufferItem<'a, Ctx, MakeCtx> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl<'a, Ctx, MakeCtx> Ord for BufferItem<'a, Ctx, MakeCtx> { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + match (&self.0, &other.0) { + // Compare based on timestamp, then on CPU ID to match the same order as trace-cmd + // report + (Ok(x), Ok(y)) => Ord::cmp(&(x.3, x.2.cpu), &(y.3, y.2.cpu)), + // Consider an error to be like the smallest timestamp possible. This ensures that + // errors are propagated as soon as they are encountered in the buffer + (Err(_), Ok(_)) => Ordering::Less, + (Ok(_), Err(_)) => Ordering::Greater, + _ => Ordering::Equal, + } + } +} + +pub struct Buffer<'i, 'h> { + header: &'h Header, + pub id: BufferId, + page_size: MemSize, + reader: Box + Send>, +} + +impl<'i, 'h> Buffer<'i, 'h> { + // Keep BufferBorrowingRead an implementation detail for now in case we + // need something more powerful than BufferBorrowingRead in the future. + pub fn new( + id: BufferId, + reader: I, + page_size: MemSize, + header: &'h Header, + ) -> Self { + Buffer { + id, + reader: Box::new(reader), + page_size, + header, + } + } +} + +impl HeaderV7 { + pub(crate) fn buffers<'i, 'h, 'a: 'i + 'h, I: BorrowingRead + Send + 'i>( + &'a self, + header: &'h Header, + input: I, + ) -> Result>, BufferError> { + self.options + .iter() + .filter_map(|option| match option { + Options::Buffer { + cpu, + name, + offset, + size, + page_size, + decomp, + } => { + let make_buffer = || -> Result, BufferError> { + // At some point, trace-cmd was creating files with a + // broken size for compressed section: the real size was + // + 4. Since this has been + // fixed and there is no way to distinguish if the file + // is affected, we simply ignore the size when + // compression is used. This is not a major problem as + // the compression header contains a chunk count that + // will be used to know when to stop reading anyway. + // + // However, non-compressed buffers still rely on the + // recorded size to known when EOF is reached, so we + // preserve the value. + // https://bugzilla.kernel.org/show_bug.cgi?id=217367 + let size = if decomp.is_some() { None } else { Some(*size) }; + + let reader = input.clone_and_seek(*offset, size)?; + let reader: Box = match decomp { + Some(decomp) => Box::new(DecompBorrowingReader::new( + &self.kernel_abi, + decomp, + reader, + )?), + None => Box::new(reader), + }; + Ok(Buffer { + id: BufferId { + cpu: *cpu, + name: name.clone(), + }, + reader, + page_size: *page_size, + header, + }) + }; + Some(make_buffer()) + } + _ => None, + }) + .collect() + } +} + +impl HeaderV6 { + pub(crate) fn buffers<'i, 'h, 'a: 'i + 'h, I: BorrowingRead + Send + 'i>( + &'a self, + header: &'h Header, + input: I, + ) -> Result>, BufferError> { + let nr_cpus = self.nr_cpus; + let abi = &self.kernel_abi; + let instances = self.options.iter().filter_map(|opt| match opt { + Options::Instance { name, offset } => { + eprintln!("INSTANCE BUFFER OPTION {name:?} {offset}"); + Some((name.clone(), *offset)) + } + _ => None, + }); + + enum LocId { + TopLevelInstanceCPU(CPU), + Instance(String), + } + + let locs = self + .top_level_buffer_locations + .iter() + .enumerate() + .map(|(cpu, loc)| { + ( + LocId::TopLevelInstanceCPU(cpu.try_into().unwrap()), + loc.offset, + Some(loc.size), + ) + }) + .chain(instances.map(|(name, offset)| (LocId::Instance(name), offset, None))); + + let buffers = locs.map(|(loc_id, offset, size)| { + let mut reader = input.clone_and_seek(offset, size)?; + let page_size = self.page_size.try_into().unwrap(); + match loc_id { + LocId::TopLevelInstanceCPU(cpu) => Ok(vec![Buffer { + id: BufferId { + cpu, + name: "".into(), + }, + page_size, + reader: Box::new(reader), + header, + }]), + LocId::Instance(name) => { + let data_kind = reader.read_null_terminated()?.to_owned(); + buffer_locations(&data_kind, nr_cpus, abi, &name, &mut reader)? + .into_iter() + .map(|loc| { + Ok(Buffer { + id: loc.id, + reader: Box::new(input.clone_and_seek(loc.offset, Some(loc.size))?), + page_size, + header, + }) + }) + .collect::, _>>() + } + } + }); + let buffers = buffers.collect::, BufferError>>()?; + Ok(buffers.into_iter().flatten().collect()) + } +} + +#[inline] +unsafe fn transmute_lifetime<'b, T: ?Sized>(x: &T) -> &'b T { + core::mem::transmute(x) +} + +#[inline] +unsafe fn transmute_lifetime_mut<'b, T: ?Sized>(x: &mut T) -> &'b mut T { + core::mem::transmute(x) +} + +pub fn flyrecord<'i, 'h, R, F, IntoIter, MakeCtx, Ctx>( + buffers: IntoIter, + mut f: F, + make_ctx: MakeCtx, +) -> Result + 'h + CaptureLifetime<'i>, BufferError> +where + IntoIter: IntoIterator>, + F: 'h + + for<'i1, 'edm> FnMut(Result, BufferError>) -> R, + MakeCtx: 'h + FnMut(&'h Header, &'h EventDesc) -> Ctx, + Ctx: 'h, + 'i: 'h, +{ + let make_ctx = Arc::new(Mutex::new(make_ctx)); + + macro_rules! make_record_iter { + ($buffer:expr) => {{ + let mut buffer = $buffer; + let buf_id = buffer.id; + let header = buffer.header; + let timestamp_fixer = header.timestamp_fixer(); + let make_ctx = Arc::clone(&make_ctx); + + // Each buffer will have its own hot map which is not ideal, but the + // maps contain &EventDesc so the descriptor itself actually lives + // in the header and is shared. This ensures we will not parse event + // format more than once, which is the main cost here. + let mut desc_map = EventDescMap::new(header, make_ctx); + gen!({ + loop { + match extract_page(header, &buf_id, &mut buffer.reader, buffer.page_size) { + Ok(Some((data, mut timestamp, recoverable_err))) => { + if let Some(err) = recoverable_err { + yield_!(BufferItem(Err(err))) + } + + let mut data = &*data; + while data.len() != 0 { + match parse_record(header, data, timestamp) { + Ok((remaining, timestamp_, record)) => { + timestamp = timestamp_; + data = remaining; + match record { + Ok(BufferRecord::Event(data)) => { + // SAFETY: That yielded &[u8] will + // only stay valid until the next + // time next() is called on the + // iterator. MergedIterator + // specifically guarantees to not + // call next() on inner iterators + // before its own next() is called. + // + // Note that this is not the case + // with e.g. itertools kmerge_by() + // method. + let data = unsafe { transmute_lifetime(data) }; + let buf_id_ref = + unsafe { transmute_lifetime(&buf_id) }; + let desc_map_ref = unsafe { + transmute_lifetime_mut(&mut desc_map) + }; + yield_!(BufferItem(Ok(( + header, + desc_map_ref, + buf_id_ref, + timestamp_fixer(timestamp), + data + )))); + } + _ => (), + } + } + Err(err) => { + yield_!(BufferItem(Err(err.into()))); + break; + } + } + } + } + Ok(None) => break, + Err(err) => { + yield_!(BufferItem(Err(err))); + break; + } + } + } + }) + }}; + } + + let iterators = buffers.into_iter().map(|buffer| make_record_iter!(buffer)); + // Buffer used to reorder array data in case the trace does not have native + // endianness. + let mut visitor_scratch = ScratchAlloc::new(); + + match MergedIterator::new(iterators) { + Some(merged) => { + Ok(merged.enumerate().map(move |(i, x)| match x { + BufferItem(Ok((header, desc_map, buffer_id, timestamp, data))) => { + let visitor = EventVisitor::new( + header, + buffer_id, + timestamp, + data, + &visitor_scratch, + desc_map, + ); + let x = f(Ok(visitor)); + // Reset the scratch allocator, thereby freeing any value allocated in + // it (without running their Drop implementation). + // + // Reduce the overhead of reseting the scratch allocator. + if (i % 16) == 0 { + visitor_scratch.reset(); + } + x + } + BufferItem(Err(err)) => f(Err(err)), + })) + } + None => Err(BufferError::NoRingBuffer), + } +} + +#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum BufferError { + #[error("Header contains not ring buffer reference")] + NoRingBuffer, + + #[error("Some events were lost in buffer {0:?}: {1:?}")] + LostEvents(BufferId, Option), + + #[error("Page data too large to be parsed: {0}")] + PageDataTooLarge(u64), + + #[error("Event descriptor for event ID {0} was not found")] + EventDescriptorNotFound(EventId), + + #[error("Too many CPUs in the system, CPU ID cannot be represented")] + TooManyCpus, + + #[error("Could not compute the array size")] + UnknownArraySize, + + #[error("struct sockaddr buffer was too small to decode")] + SockAddrTooSmall, + + #[error("Unknown socket family code: {0}")] + UnknownSockAddrFamily(u16), + + #[error("No decoder for that field")] + NoDecoder, + + #[error("I/O error while loading data: {0}")] + IoError(Box), + + #[error("Error while parsing header: {0}")] + HeaderError(Box), + + #[error("Compilation error while loading data: {0}")] + CompileError(Box), +} + +convert_err_impl!(HeaderError, HeaderError, BufferError); +convert_err_impl!(CompileError, CompileError, BufferError); +convert_err_impl!(io::ErrorKind, IoError, BufferError); + +impl From for BufferError { + fn from(err: io::Error) -> BufferError { + err.kind().into() + } +} + +trait BufferBorrowingRead<'a> +where + Self: 'a + Send, +{ + fn read(&mut self, count: MemSize) -> io::Result<&[u8]>; +} + +impl<'a> BufferBorrowingRead<'a> for Box + Send> { + #[inline] + fn read(&mut self, count: MemSize) -> io::Result<&[u8]> { + self.deref_mut().read(count) + } +} + +impl<'a, R> BufferBorrowingRead<'a> for R +where + R: BorrowingRead + Send + 'a, +{ + #[inline] + fn read(&mut self, count: MemSize) -> io::Result<&[u8]> { + ::read(self, count) + } +} + +struct DecompBorrowingReader<'a, I, D> { + abi: &'a Abi, + inner: I, + decomp: &'a D, + remaining_chunks: u32, + + // Buffer used to decompress data into. It will not incur lots of + // allocations in the hot path since it will be reused once it reaches the + // appropriate size. + buffer: Vec, + + // current offset in the data. + offset: MemOffset, +} + +impl<'a, I, D> DecompBorrowingReader<'a, I, D> +where + D: Decompressor + 'a, + I: BorrowingRead, +{ + fn new(abi: &'a Abi, decomp: &'a D, mut reader: I) -> io::Result { + let nr_chunks: u32 = reader.read_int(abi.endianness)?; + + Ok(DecompBorrowingReader { + abi, + decomp, + inner: reader, + remaining_chunks: nr_chunks, + buffer: Vec::new(), + offset: 0, + }) + } +} + +impl<'a, I, D> BufferBorrowingRead<'a> for DecompBorrowingReader<'a, I, D> +where + I: BorrowingRead + Send + 'a, + D: Decompressor + Send + 'a, +{ + fn read(&mut self, count: MemSize) -> io::Result<&[u8]> { + let len = self.buffer.len(); + let offset = self.offset; + + if offset + count > len { + if self.remaining_chunks == 0 { + self.offset = len; + Err(io::ErrorKind::UnexpectedEof.into()) + } else { + // Move the non-read data at the beginning of the vec, so we can + // just reuse that allocation inplace. + let new_len = len - offset; + self.buffer.copy_within(offset..len, 0); + self.buffer.truncate(new_len); + self.offset = 0; + + while self.buffer.len() < count { + if self.remaining_chunks == 0 { + return Err(io::ErrorKind::UnexpectedEof.into()); + } else { + self.remaining_chunks -= 1; + + let compressed_count: u32 = self.inner.read_int(self.abi.endianness)?; + let compressed_count: usize = compressed_count.try_into().unwrap(); + + let decompressed_count: u32 = self.inner.read_int(self.abi.endianness)?; + let decompressed_count: usize = decompressed_count.try_into().unwrap(); + + let compressed = self.inner.read(compressed_count)?; + + let len = self.buffer.len(); + self.buffer.resize(len + decompressed_count, 0); + + self.decomp.decompress_into( + compressed, + &mut self.buffer[len..len + decompressed_count], + )?; + } + } + + self.offset = count; + Ok(&self.buffer[..count]) + } + } else { + self.offset += count; + Ok(&self.buffer[offset..offset + count]) + } + } +} + +fn extract_page<'a, 'b: 'a, 'h, I>( + header: &'h Header, + buf_id: &'a BufferId, + input: &'a mut I, + page_size: MemSize, +) -> Result< + Option<( + impl Deref + 'a, + Timestamp, + Option, + )>, + BufferError, +> +where + I: BufferBorrowingRead<'b>, +{ + let parse_u32 = |input| header.kernel_abi().parse_u32(input); + let parse_u64 = |input| header.kernel_abi().parse_u64(input); + assert!(page_size % 2 == 0); + let data_size_mask = (1u64 << 27) - 1; + let missing_events_mask = 1u64 << 31; + let missing_events_stored_mask = 1u64 << 30; + + let page_data = match input.read(page_size) { + Err(err) if err.kind() == io::ErrorKind::UnexpectedEof => return Ok(None), + x => x, + }?; + let data = &page_data; + let remaining = data.len(); + + let (data, timestamp) = parse_u64(data)?; + + let (data, commit) = match header.kernel_abi().long_size { + LongSize::Bits64 => parse_u64(data), + LongSize::Bits32 => parse_u32(data).map(|(data, x)| (data, x.into())), + }?; + + let data_size = data_size_mask & commit; + let data_size: usize = data_size + .try_into() + .map_err(|_| BufferError::PageDataTooLarge(data_size))?; + + let consumed = remaining - data.len(); + let has_missing_events = (commit & missing_events_mask) != 0; + let recoverable_err = if has_missing_events { + let has_missing_events_stored = (commit & missing_events_stored_mask) != 0; + let nr_missing = if has_missing_events_stored { + let data = &data[data_size..]; + let nr = match header.kernel_abi().long_size { + LongSize::Bits32 => parse_u32(data)?.1.into(), + LongSize::Bits64 => parse_u64(data)?.1, + }; + Some(nr) + } else { + None + }; + Some(BufferError::LostEvents(buf_id.clone(), nr_missing)) + } else { + None + }; + let data = DerefMap::new(page_data, move |data| { + &data[consumed..(data_size + consumed)] + }); + Ok(Some((data, timestamp, recoverable_err))) +} + +#[derive(Debug)] +enum BufferRecord<'a> { + Event(&'a [u8]), + Timestamp(Timestamp), + TimeExtend(Timestamp), + Padding(FileSize), +} + +#[inline] +fn take(input: &[u8], count: usize) -> io::Result<(&[u8], &[u8])> { + let data = input + .get(..count) + .ok_or(io::Error::from(io::ErrorKind::UnexpectedEof))?; + Ok((&input[count..], data)) +} + +fn parse_record<'a>( + header: &Header, + input: &'a [u8], + timestamp: Timestamp, +) -> io::Result<(&'a [u8], Timestamp, Result, BufferError>)> { + let parse_u32 = |input| header.kernel_abi().parse_u32(input); + + let (input, record_header) = parse_u32(input)?; + let record_header: u64 = record_header.into(); + + let typ = record_header & ((1 << 5) - 1); + let delta = record_header >> 5; + + match typ { + // Padding + 29 => { + let (input, len) = parse_u32(input)?; + let len = len.saturating_sub(4); + let len_usize: usize = len.try_into().unwrap(); + // For some reason the len is sometimes incorrect and larger than the remaining input. + let input = input.get(len_usize..).unwrap_or(&[]); + Ok((input, timestamp, Ok(BufferRecord::Padding(len.into())))) + } + // Time extend + 30 => { + let (input, x) = parse_u32(input)?; + let x: u64 = x.into(); + + let extend = delta + (x << 27); + Ok(( + input, + timestamp + extend, + Ok(BufferRecord::TimeExtend(extend)), + )) + } + // Timestamp + 31 => { + let msb = timestamp & (0xf8u64 << 56); + let (input, x) = parse_u32(input)?; + let x: u64 = x.into(); + let timestamp: Timestamp = delta + (x << 27); + let timestamp = timestamp | msb; + Ok((input, timestamp, Ok(BufferRecord::Timestamp(timestamp)))) + } + // Event + _ => { + let alignment = 4; + let (input, size, _padding) = match typ { + 0 => { + let (input, size) = parse_u32(input)?; + // The size includes the size itself + let size = size - 4; + // Align the size on the event array item alignment. Since + // it's a array of 32bit ints, we align on 4. + let aligned = size & !(alignment - 1); + let padding = aligned - size; + (input, size.into(), padding) + } + // Currently, ZLA fields are buggy as we cannot know the + // true data size. Instead, we get this aligned size, which + // can includes padding junk: + // https://bugzilla.kernel.org/show_bug.cgi?id=210173 + _ => { + let alignment: u64 = alignment.into(); + (input, typ * alignment, 0) + } + }; + + let (input, data) = take(input, size.try_into().unwrap())?; + + Ok((input, timestamp + delta, Ok(BufferRecord::Event(data)))) + } + } +} + +impl PrintFmtStr { + fn vbin_decoders<'a>(&'a self, header: &'a Header) -> &Vec { + let abi = header.kernel_abi(); + let char_signedness = abi.char_signedness; + self.vbin_decoders.get_or_init(|| { + make_closure_coerce_type!( + decoder_hrtb, + Arc< + dyn for<'a> Fn( + &'a [u8], + &'a Header, + ) + -> Result<(&'a [u8], Value<'a>), BufferError> + + Send + + Sync, + > + ); + + macro_rules! scalar_parser { + ($decoder:ident, $typ:ty, $ctor:ident, $align:expr) => { + ( + $align, + decoder_hrtb(Arc::new( + move |data: &[u8], + header: &Header| + -> Result<(&[u8], Value<'_>), BufferError> { + let (remaining, x) = header.kernel_abi().$decoder(data)?; + Ok((remaining, Value::$ctor((x as $typ).into()))) + }, + )), + ) + }; + } + let atom_decoder = |vbin_spec: &_| match vbin_spec { + VBinSpecifier::U8 => scalar_parser!(parse_u8, u8, U64Scalar, 1), + VBinSpecifier::I8 => scalar_parser!(parse_u8, i8, I64Scalar, 1), + + VBinSpecifier::U16 => scalar_parser!(parse_u16, u16, U64Scalar, 2), + VBinSpecifier::I16 => scalar_parser!(parse_u16, i16, I64Scalar, 2), + + VBinSpecifier::U32 => scalar_parser!(parse_u32, u32, U64Scalar, 4), + VBinSpecifier::I32 => scalar_parser!(parse_u32, i32, I64Scalar, 4), + + VBinSpecifier::U64 => scalar_parser!(parse_u64, u64, U64Scalar, 4), + VBinSpecifier::I64 => scalar_parser!(parse_u64, i64, I64Scalar, 4), + + VBinSpecifier::Str => ( + 1, + decoder_hrtb(Arc::new(move |data: &[u8], _header| { + match data.iter().position(|x| *x == 0) { + None => Err(io::ErrorKind::UnexpectedEof.into()), + Some(pos) => Ok(( + &data[pos + 1..], + match core::str::from_utf8(&data[..pos]) { + Ok(s) => Value::Str(Str::new_borrowed(s)), + Err(_) => match char_signedness { + Signedness::Unsigned => { + Value::U8Array(array::Array::Borrowed(&data[..pos + 1])) + } + Signedness::Signed => { + let slice: &[i8] = cast_slice(&data[..pos + 1]); + Value::I8Array(array::Array::Borrowed(slice)) + } + }, + }, + )), + } + })), + ), + }; + + self.atoms + .iter() + .filter_map(|atom| { + let (alignment, decode) = match atom { + PrintAtom::Variable { vbin_spec, .. } => Some(atom_decoder(vbin_spec)), + _ => None, + }?; + Some(VBinDecoder { + atom: atom.clone(), + alignment, + decode, + }) + }) + .collect() + }) + } + + #[inline] + pub fn vbin_fields<'a>( + &'a self, + header: &'a Header, + scratch: &'a ScratchAlloc, + input: &'a [u32], + ) -> impl IntoIterator, BufferError>> { + let mut i = 0; + let mut decoders = self.vbin_decoders(header).iter(); + let mut failed = false; + let align = |x: usize, align: usize| (x + (align - 1)) & !(align - 1); + + let input = if header.kernel_abi().endianness.is_native() { + input + } else { + // The decoding of the [u32] will have swapped bytes to be in our native order, so we + // need to put it back in the kernel's order before trying to decode. Then within that + // reconstructed [u8] we can parse stuff as usual, following kernel endianness. This + // is because despite the buffer being advertised as a [u32] by the kernel, it is + // actually manipulated as a [u8] (see vbin_printf() implementation) + let mut swapped_input = ScratchVec::with_capacity_in(input.len(), scratch); + for x in input { + swapped_input.push((*x).swap_bytes()) + } + swapped_input.leak() + }; + let input = bytemuck::cast_slice(input); + + std::iter::from_fn(move || { + if failed { + return None; + } + + let decoder = decoders.next()?; + + macro_rules! handle_err { + ($res:expr) => { + match $res { + Ok(x) => x, + Err(err) => { + failed = true; + return Some(Err(err.into())); + } + } + }; + } + + macro_rules! update_i { + ($remaining:expr) => { + i = $remaining.as_ptr() as usize - input.as_ptr() as usize; + }; + } + + let (width, precision) = match &decoder.atom { + PrintAtom::Variable { + width: width_kind, + precision: precision_kind, + .. + } => { + let abi = &header.kernel_abi(); + let mut decode_u32 = |data: &[u8]| -> io::Result { + let (remaining, x) = abi.parse_u32(&data[align(i, 4)..])?; + update_i!(remaining); + Ok(x) + }; + ( + if width_kind == &PrintWidth::Dynamic { + Some(handle_err!(decode_u32(input)).try_into().unwrap()) + } else { + None + }, + if precision_kind == &PrintPrecision::Dynamic { + Some(handle_err!(decode_u32(input)).try_into().unwrap()) + } else { + None + }, + ) + } + _ => (None, None), + }; + + let j = align(i, decoder.alignment); + let (remaining, value) = handle_err!((decoder.decode)(&input[j..], header)); + update_i!(remaining); + + Some(Ok(PrintArg { + value, + width, + precision, + })) + }) + } +} + +#[derive(Clone)] +pub struct VBinDecoder { + atom: PrintAtom, + alignment: MemAlign, + #[allow(clippy::type_complexity)] + decode: Arc< + dyn for<'a> Fn(&'a [u8], &'a Header) -> Result<(&'a [u8], Value<'a>), BufferError> + + Send + + Sync, + >, +} + +impl Debug for VBinDecoder { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + f.debug_struct("VBinDecoder") + .field("atom", &self.atom) + .field("alignment", &self.alignment) + .finish_non_exhaustive() + } +} + +impl PartialEq for VBinDecoder { + fn eq(&self, other: &Self) -> bool { + self.atom == other.atom && self.alignment == other.alignment + } +} + +impl Eq for VBinDecoder {} + +impl PartialOrd for VBinDecoder { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for VBinDecoder { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + (&self.atom, &self.alignment).cmp(&(&other.atom, &other.alignment)) + } +} diff --git a/tools/trace-parser/traceevent/src/cinterp.rs b/tools/trace-parser/traceevent/src/cinterp.rs new file mode 100644 index 0000000000..e1863d3088 --- /dev/null +++ b/tools/trace-parser/traceevent/src/cinterp.rs @@ -0,0 +1,2361 @@ +use core::{fmt, ops::Deref, num::Wrapping}; +use std::{string::String as StdString, sync::Arc}; + +use bytemuck::cast_slice; +use thiserror::Error; + +use crate::{ + array::Array, + buffer::BufferError, + cparser, + cparser::{ArrayKind, Expr, ExtensionMacroKind, Type}, + header::{Abi, Address, Endianness, FileSize, Header, Identifier, LongSize, Signedness}, + print::{PrintError, PrintFmtError}, + scratch::{OwnedScratchBox, ScratchAlloc, ScratchBox}, + str::Str, + error::convert_err_impl, +}; + +#[derive(Error, Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum CompileError { + #[error("Cannot this handle expression in its context: {0:?}")] + ExprNotHandled(Expr), + + #[error("Cannot dereference an expression of type {0:?}: {1:?}")] + CannotDeref(Type, Expr), + + #[error("Type is not an array: {0:?}")] + NotAnArray(Type), + + #[error("Type not supported as array item: {0:?}")] + InvalidArrayItem(Type), + + #[error("Size of this type is unknown: {0:?}")] + UnknownSize(Type), + + #[error("Non arithmetic operand used with arithmetic operator")] + NonArithmeticOperand(Type), + + #[error("Mismatching types in operands of {0:?}: {1:?} and {2:?}")] + MismatchingOperandType(Expr, Type, Type), + + #[error("Cannot cast between incompatible pointer types: {0:?} => {1:?}")] + IncompatiblePointerCast(Type, Type), + + #[error("The field \"{0}\" does not exist")] + UnknownField(String), +} + +#[derive(Error, Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum EvalError { + #[error("Illegal type of value: {}", match .0 { + Some(val) => val.to_string(), + None => "".into() + })] + IllegalType(Option>), + + #[error("Cannot convert this value to a signed as it is too big: {0}")] + CannotConvertToSigned(u64), + + #[error("Attempted to index a scalar value: {}", match .0 { + Some(val) => val.to_string(), + None => "".into() + })] + CannotIndexScalar(Option>), + + #[error("Array index out of bonds: {0}")] + OutOfBondIndex(usize), + + #[error("Could not dereference address: {0}")] + CannotDeref(Address), + + #[error("Event data not available")] + NoEventData, + + #[error("No header available")] + NoHeader, + + #[error("Error while evaluating extension macro call \"{}\": {}", .call, .error)] + ExtensionMacroError { + call: StdString, + error: StdString + }, + + #[error("Error while decoding buffer: {0}")] + BufferError(Box), + + #[error("Error while parsing a vbin buffer format: {0}")] + PrintFmtError(Box), + + #[error("Error while evaluating a vbin buffer: {0}")] + PrintError(Box), +} + +convert_err_impl!(BufferError, BufferError, EvalError); +convert_err_impl!(PrintFmtError, PrintFmtError, EvalError); +convert_err_impl!(PrintError, PrintError, EvalError); + +#[derive(Error, Debug, Clone, PartialEq, Eq)] +pub enum InterpError { + #[error("Could not compile: {0}")] + CompileError(Box), + #[error("Could not evaluate: {0}")] + EvalError(Box), +} +convert_err_impl!(EvalError, EvalError, InterpError); +convert_err_impl!(CompileError, CompileError, InterpError); + +#[derive(Clone, Debug, PartialEq, Eq)] +enum SockAddrFamily { + Ipv4, + Ipv6, +} + +impl SockAddrFamily { + #[inline] + fn from_raw(code: u16) -> Result { + match code { + 2 => Ok(SockAddrFamily::Ipv4), + 10 => Ok(SockAddrFamily::Ipv6), + _ => Err(BufferError::UnknownSockAddrFamily(code)), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum SockAddrKind { + Full, + Ipv4AddrOnly, + Ipv6AddrOnly, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SockAddr<'a> { + family: SockAddrFamily, + kind: SockAddrKind, + endianness: Endianness, + data: &'a [u8], +} + +#[derive(thiserror::Error, Debug, PartialEq, Eq, Clone)] +#[non_exhaustive] +pub enum SockAddrError { + #[error("Could not convert value")] + CannotConvert, +} + +macro_rules! get_array { + ($slice:expr, $len:expr) => {{ + let slice: &[u8] = $slice; + let slice = slice.get(..$len).ok_or(SockAddrError::CannotConvert)?; + let arr: [u8; $len] = slice.try_into().map_err(|_| SockAddrError::CannotConvert)?; + arr + }}; +} + +impl<'a> SockAddr<'a> { + #[inline] + pub fn from_bytes( + data: &'a [u8], + endianness: Endianness, + kind: SockAddrKind, + ) -> Result { + let family = match kind { + SockAddrKind::Full => { + let (_data, family) = endianness + .parse_u16(data) + .map_err(|_| BufferError::SockAddrTooSmall)?; + SockAddrFamily::from_raw(family) + } + SockAddrKind::Ipv4AddrOnly => Ok(SockAddrFamily::Ipv4), + SockAddrKind::Ipv6AddrOnly => Ok(SockAddrFamily::Ipv6), + }?; + + Ok(SockAddr { + family, + kind, + data, + endianness, + }) + } + + // Format of the structs described at: + // https://www.gnu.org/software/libc/manual/html_node/Internet-Address-Formats.html + // The order of struct members is different in the kernel struct. + + pub fn to_socketaddr(&self) -> Result { + match (&self.kind, &self.family) { + (SockAddrKind::Full, SockAddrFamily::Ipv4) => { + let port = u16::from_be_bytes(get_array!(&self.data[2..], 2)); + + // The kernel structs use network endianness but the user + // might pass a little endian buffer and ask for that + // explicitly. + let (_, addr) = self + .endianness + .parse_u32(&self.data[4..]) + .map_err(|_| SockAddrError::CannotConvert)?; + + Ok(std::net::SocketAddr::V4(std::net::SocketAddrV4::new( + addr.into(), + port, + ))) + } + (SockAddrKind::Full, SockAddrFamily::Ipv6) => { + let port = u16::from_be_bytes(get_array!(&self.data[2..], 2)); + let flowinfo = u32::from_be_bytes(get_array!(&self.data[4..], 4)); + let addr = u128::from_be_bytes(get_array!(&self.data[8..], 16)); + let (_, scope_id) = self + .endianness + .parse_u32(&self.data[24..]) + .map_err(|_| SockAddrError::CannotConvert)?; + + Ok(std::net::SocketAddr::V6(std::net::SocketAddrV6::new( + addr.into(), + port, + flowinfo, + scope_id, + ))) + } + _ => Err(SockAddrError::CannotConvert), + } + } + + pub fn to_ipaddr(&self) -> Result { + match self.to_socketaddr() { + Ok(sockaddr) => Ok(sockaddr.ip()), + _ => match (&self.kind, &self.family) { + (SockAddrKind::Ipv4AddrOnly, SockAddrFamily::Ipv4) => { + // The kernel structs use network endianness but the user + // might pass a little endian buffer and ask for that + // explicitly. + let (_, addr) = self + .endianness + .parse_u32(self.data) + .map_err(|_| SockAddrError::CannotConvert)?; + let addr: std::net::Ipv4Addr = addr.into(); + Ok(addr.into()) + } + + (SockAddrKind::Ipv6AddrOnly, SockAddrFamily::Ipv6) => { + let data = get_array!(&self.data, 16); + // struct in6_addr is always encoded in big endian. The + // h/n/b/l printk specifiers are documented to be ignored + // for IPv6 + let addr = u128::from_be_bytes(data); + let addr: std::net::Ipv6Addr = addr.into(); + Ok(addr.into()) + } + _ => panic!("Inconsistent sockaddr kind and family"), + }, + } + } +} + +impl<'a> fmt::Display for SockAddr<'a> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + match self.to_socketaddr() { + Ok(addr) => fmt::Display::fmt(&addr, f), + Err(err) => write!(f, "ERROR<{err:?}>"), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Bitmap<'a> { + data: &'a [u8], + pub(crate) chunk_size: LongSize, + endianness: Endianness, +} + +impl<'a> Bitmap<'a> { + #[inline] + pub(crate) fn from_bytes<'abi>(data: &'a [u8], abi: &'abi Abi) -> Self { + let chunk_size: usize = abi.long_size.into(); + assert!(data.len() % chunk_size == 0); + Bitmap { + data, + chunk_size: abi.long_size, + endianness: abi.endianness, + } + } + + #[inline] + pub(crate) fn len(&self) -> usize { + self.data.len() + } +} + +impl<'a> fmt::Display for Bitmap<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + let mut range_start = None; + let mut prev = None; + let mut sep = ""; + + let mut print_range = |range_start, prev, sep| match range_start { + Some(range_start) if range_start == prev => { + write!(f, "{sep}{prev}") + } + None => write!(f, "{sep}{prev}"), + Some(range_start) => { + write!(f, "{sep}{range_start}-{prev}") + } + }; + + for curr in self { + match prev { + None => range_start = Some(curr), + Some(prev) => { + if curr != prev + 1 { + print_range(range_start, prev, sep)?; + sep = ","; + range_start = Some(curr); + } + } + }; + prev = Some(curr); + } + if let Some(prev) = prev { + print_range(range_start, prev, sep)? + } + Ok(()) + } +} + +impl<'a> IntoIterator for &'a Bitmap<'a> { + type Item = as Iterator>::Item; + type IntoIter = BitmapIterator<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + BitmapIterator { + bitmap: self, + curr_chunk: None, + next_chunk_index: 0, + bit_index: 0, + } + } +} + +pub struct BitmapIterator<'a> { + bitmap: &'a Bitmap<'a>, + + curr_chunk: Option, + next_chunk_index: usize, + bit_index: usize, +} + +impl<'a> BitmapIterator<'a> { + fn next_chunk(&mut self) -> Option { + let chunk_size = self.bitmap.chunk_size; + let chunk_usize: usize = chunk_size.into(); + let data = self.bitmap.data; + let base = self.next_chunk_index * chunk_usize; + if base < data.len() { + self.bit_index = 0; + self.next_chunk_index += 1; + let chunk = &data[base..base + chunk_usize]; + + Some(match chunk_size { + LongSize::Bits64 => { + let chunk = chunk.try_into().unwrap(); + match self.bitmap.endianness { + Endianness::Little => u64::from_le_bytes(chunk), + Endianness::Big => u64::from_be_bytes(chunk), + } + } + LongSize::Bits32 => { + let chunk = chunk.try_into().unwrap(); + match self.bitmap.endianness { + Endianness::Little => u32::from_le_bytes(chunk) as u64, + Endianness::Big => u32::from_be_bytes(chunk) as u64, + } + } + }) + } else { + None + } + } + + #[inline] + pub fn as_chunks(&'a mut self) -> impl Iterator + 'a { + core::iter::from_fn(move || self.next_chunk()) + } + + #[inline] + pub fn as_bytes(&'a mut self) -> impl Iterator + 'a { + let mut curr = None; + let max_idx = match self.bitmap.chunk_size { + LongSize::Bits32 => 4, + LongSize::Bits64 => 8, + }; + let mut idx = max_idx; + core::iter::from_fn(move || { + if idx == max_idx { + curr = self.next_chunk(); + idx = 0; + } + match curr { + None => None, + Some(chunk) => { + let x = (0xFF & (chunk >> (idx * 8))) as u8; + idx += 1; + Some(x) + } + } + }) + } + + #[inline] + pub fn as_bits(&'a mut self) -> impl Iterator + 'a { + let mut curr = None; + let max_idx = match self.bitmap.chunk_size { + LongSize::Bits32 => 32, + LongSize::Bits64 => 64, + }; + let mut idx = max_idx; + core::iter::from_fn(move || { + if idx == max_idx { + curr = self.next_chunk(); + idx = 0; + } + match curr { + None => None, + Some(chunk) => { + let x = (0b1 & (chunk >> idx)) != 0; + idx += 1; + Some(x) + } + } + }) + } +} + +impl<'a> Iterator for BitmapIterator<'a> { + type Item = usize; + + fn next(&mut self) -> Option { + let chunk_size: usize = self.bitmap.chunk_size.into(); + loop { + match self.curr_chunk { + Some(chunk) => { + if self.bit_index < chunk_size - 1 { + let bit_index = self.bit_index; + self.bit_index += 1; + + let is_set = (chunk & (1 << bit_index)) != 0; + if is_set { + let global_index = bit_index + (self.next_chunk_index - 1) * chunk_size; + break Some(global_index); + } + } else { + self.curr_chunk = Some(self.next_chunk()?); + } + } + None => { + self.curr_chunk = Some(self.next_chunk()?); + } + } + } + } +} + +// Pointers fall into 3 categories: +// 1. Pointers to unknown values. They are represented by an Value::U64Scalar at +// runtime. +// 2. Pointers to known arrays, e.g. char* pointing at string constants. They +// are represented by Value::XXArray at runtime. +// 3. Pointers to known scalar values: this does not happen often in trace.dat. +// They are only currently supported when appearing in "*&x" that gets +// simplified into "x", or as &x (evaluates to a symbolic address). + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Value<'a> { + U64Scalar(u64), + I64Scalar(i64), + + // Similar to U8Array but will act as a null-terminated string, and is + // guaranteed to be utf-8 encoded. + Str(Str<'a>), + + U8Array(Array<'a, u8>), + I8Array(Array<'a, i8>), + + U16Array(Array<'a, u16>), + I16Array(Array<'a, i16>), + + U32Array(Array<'a, u32>), + I32Array(Array<'a, i32>), + + U64Array(Array<'a, u64>), + I64Array(Array<'a, i64>), + + // Variable, usually REC, and usually erased at compile time when it appears + // in the pattern REC->foobar + Variable(Identifier), + + // Symbolic address of a value + Addr(ScratchBox<'a, Value<'a>>), + + // Kernel bitmap, such as cpumask_t + Bitmap(Bitmap<'a>), + + // Kernel struct sockaddr. We don't use std::net::SockAddr as we need to be + // able to represent any socket type the kernel can handle, which goes + // beyond IP. Also, we want a zero-copy value. + SockAddr(SockAddr<'a>), + + // Used for runtime decoding of event field values. + Raw(Arc, Array<'a, u8>), + Unknown, +} + +impl<'a> Value<'a> { + #[inline] + pub fn deref_ptr<'ee, EE>(&'a self, env: &'ee EE) -> Result, EvalError> + where + 'ee: 'a, + EE: EvalEnv<'ee> + ?Sized, + { + match self { + Value::Addr(sbox) => Ok(sbox.deref().clone()), + Value::Str(s) => Ok(Value::Str(Str::new_borrowed(s))), + + Value::U64Scalar(addr) => env.deref_static(*addr), + Value::I64Scalar(addr) => env.deref_static(*addr as u64), + + Value::U8Array(arr) => Ok(Value::U8Array(Array::Borrowed(arr))), + Value::I8Array(arr) => Ok(Value::I8Array(Array::Borrowed(arr))), + + Value::U16Array(arr) => Ok(Value::U16Array(Array::Borrowed(arr))), + Value::I16Array(arr) => Ok(Value::I16Array(Array::Borrowed(arr))), + + Value::U32Array(arr) => Ok(Value::U32Array(Array::Borrowed(arr))), + Value::I32Array(arr) => Ok(Value::I32Array(Array::Borrowed(arr))), + + Value::U64Array(arr) => Ok(Value::U64Array(Array::Borrowed(arr))), + Value::I64Array(arr) => Ok(Value::I64Array(Array::Borrowed(arr))), + val => Err(EvalError::IllegalType(val.clone().into_static().ok())), + } + } + + pub fn to_bytes(&self) -> Option + '_> { + use Value::*; + + let (add_null, slice) = match self { + Str(s) => Some((true, s.as_bytes())), + Raw(_, arr) => Some((false, arr.deref())), + + U8Array(arr) => Some((false, arr.deref())), + I8Array(arr) => Some((false, cast_slice(arr))), + + U16Array(arr) => Some((false, cast_slice(arr))), + I16Array(arr) => Some((false, cast_slice(arr))), + + U32Array(arr) => Some((false, cast_slice(arr))), + I32Array(arr) => Some((false, cast_slice(arr))), + + U64Array(arr) => Some((false, cast_slice(arr))), + I64Array(arr) => Some((false, cast_slice(arr))), + _ => None, + }?; + let mut iter = slice.iter(); + Some(core::iter::from_fn(move || match iter.next().copied() { + Some(x) => Some(x), + None if add_null => Some(0), + _ => None, + })) + } + + pub fn to_str(&self) -> Option<&str> { + macro_rules! from_array { + ($s:expr) => { + if let Some(s) = $s.split(|c| *c == 0).next() { + if let Ok(s) = std::str::from_utf8(s) { + Some(s) + } else { + None + } + } else { + None + } + }; + } + match self { + Value::U8Array(s) => from_array!(s), + Value::I8Array(s) => from_array!(cast_slice(s)), + Value::Str(s) => Some(s), + _ => None, + } + } + + pub fn into_static(self) -> Result, Value<'a>> { + use Value::*; + + macro_rules! array { + ($variant:ident, $arr:expr) => { + Ok($variant($arr.into_static())) + }; + } + match self { + U64Scalar(x) => Ok(U64Scalar(x)), + I64Scalar(x) => Ok(I64Scalar(x)), + + Str(s) => Ok(Str(s.into_static())), + + U8Array(arr) => array!(U8Array, arr), + I8Array(arr) => array!(I8Array, arr), + + U16Array(arr) => array!(U16Array, arr), + I16Array(arr) => array!(I16Array, arr), + + U32Array(arr) => array!(U32Array, arr), + I32Array(arr) => array!(I32Array, arr), + + U64Array(arr) => array!(U64Array, arr), + I64Array(arr) => array!(I64Array, arr), + + Raw(typ, arr) => Ok(Raw(typ, arr.into_static())), + Addr(addr) => { + let addr = addr.deref().clone(); + let addr = addr.into_static()?; + Ok(Addr(ScratchBox::Arc(Arc::new(addr)))) + } + Variable(id) => Ok(Variable(id)), + Unknown => Ok(Unknown), + + // The only bitmaps that exist are created by the kernel and stored + // in a field, they are never synthesized by any expression that + // could be evaluated ahead of time. + bitmap @ Bitmap(_) => Err(bitmap), + sockaddr @ SockAddr(_) => Err(sockaddr), + } + } + + fn get + ?Sized>( + self, + env: &'a EE, + i: usize, + ) -> Result, EvalError> { + + let (derefed, val) = match self { + Value::U64Scalar(addr) => (true, env.deref_static(addr)?), + Value::I64Scalar(addr) => (true, env.deref_static(addr as u64)?), + Value::Addr(val) => (true, val.into_inner()), + val => (false, val), + }; + + macro_rules! match_ { + ($(($array_ctor:tt, $scalar_ctor:tt)),*) => { + match val { + $( + Value::$array_ctor(vec) => { + match vec.deref().get(i) { + None => Err(EvalError::OutOfBondIndex(i)), + Some(x) => Ok(Value::$scalar_ctor(x.clone().into())) + } + } + ),* + Value::Str(s) => { + match s.as_bytes().get(i) { + None => { + if i == s.len() { + Ok(Value::U64Scalar(0)) + } else { + Err(EvalError::OutOfBondIndex(i)) + } + } + Some(c) => Ok(Value::U64Scalar((*c).into())), + } + } + val if derefed && i == 0 => Ok(val), + val => Err(EvalError::CannotIndexScalar(val.into_static().ok())) + } + } + } + match_! { + (I8Array, I64Scalar), + (U8Array, U64Scalar), + + (I16Array, I64Scalar), + (U16Array, U64Scalar), + + (I32Array, I64Scalar), + (U32Array, U64Scalar), + + (I64Array, I64Scalar), + (U64Array, U64Scalar) + } + } +} + +impl<'a> fmt::Display for Value<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + macro_rules! display { + ($x:expr) => {{ + fmt::Display::fmt(&$x, f) + }}; + } + + match self { + Value::U64Scalar(x) => display!(x), + Value::I64Scalar(x) => display!(x), + Value::Str(x) => display!(x), + Value::U8Array(x) => display!(x), + Value::I8Array(x) => display!(x), + Value::U16Array(x) => display!(x), + Value::I16Array(x) => display!(x), + Value::U32Array(x) => display!(x), + Value::I32Array(x) => display!(x), + Value::U64Array(x) => display!(x), + Value::I64Array(x) => display!(x), + Value::Variable(x) => display!(x), + Value::Addr(x) => write!(f, "
>", x.deref()), + Value::Bitmap(x) => display!(x), + Value::SockAddr(x) => display!(x), + Value::Raw(typ, data) => { + write!(f, "")?; + Ok(()) + } + Value::Unknown => write!(f, ""), + } + } +} + +pub trait CompileEnv<'ce>: EvalEnv<'ce> +where + Self: 'ce, +{ + fn field_typ(&self, id: &str) -> Result; + fn field_getter(&self, id: &str) -> Result, CompileError>; +} + +pub trait EvalEnv<'ee> +where + Self: 'ee + Send + Sync, +{ + // fn field_getter(&self, id: &str) -> Result Result>, CompileError>; + fn deref_static(&self, _addr: u64) -> Result, EvalError>; + fn event_data(&self) -> Result<&[u8], EvalError>; + + fn scratch(&self) -> &ScratchAlloc; + + fn header(&self) -> Result<&Header, EvalError>; +} + +impl<'ee, 'eeref> EvalEnv<'eeref> for &'eeref (dyn CompileEnv<'ee> + 'ee) { + #[inline] + fn deref_static(&self, addr: u64) -> Result, EvalError> { + (*self).deref_static(addr) + } + + #[inline] + fn event_data(&self) -> Result<&[u8], EvalError> { + (*self).event_data() + } + + #[inline] + fn scratch(&self) -> &ScratchAlloc { + (*self).scratch() + } + + #[inline] + fn header(&self) -> Result<&Header, EvalError> { + (*self).header() + } +} + +impl<'ce, 'ceref> CompileEnv<'ceref> for &'ceref (dyn CompileEnv<'ce> + 'ce) { + fn field_typ(&self, id: &str) -> Result { + (*self).field_typ(id) + } + fn field_getter(&self, id: &str) -> Result, CompileError> { + (*self).field_getter(id) + } +} + +pub struct EmptyEnv { + scratch: ScratchAlloc, +} + +impl<'ee> EvalEnv<'ee> for EmptyEnv { + #[inline] + fn scratch(&self) -> &ScratchAlloc { + &self.scratch + } + + fn header(&self) -> Result<&Header, EvalError> { + Err(EvalError::NoHeader) + } + + fn deref_static(&self, addr: Address) -> Result, EvalError> { + Err(EvalError::CannotDeref(addr)) + } + fn event_data(&self) -> Result<&[u8], EvalError> { + Err(EvalError::NoEventData) + } +} + +impl Default for EmptyEnv { + fn default() -> Self { + Self::new() + } +} + +impl EmptyEnv { + pub fn new() -> Self { + EmptyEnv { + scratch: ScratchAlloc::new(), + } + } +} + +impl<'ce> CompileEnv<'ce> for EmptyEnv { + #[inline] + fn field_typ(&self, _id: &str) -> Result { + Ok(Type::Unknown) + } + + #[inline] + fn field_getter(&self, id: &str) -> Result, CompileError> { + Err(CompileError::UnknownField(id.into())) + } +} + +pub struct BufferEnv<'a> { + scratch: &'a ScratchAlloc, + header: &'a Header, + data: &'a [u8], +} +impl<'a> BufferEnv<'a> { + pub fn new(scratch: &'a ScratchAlloc, header: &'a Header, data: &'a [u8]) -> Self { + BufferEnv { + scratch, + header, + data, + } + } +} + +impl<'ee> EvalEnv<'ee> for BufferEnv<'ee> { + #[inline] + fn scratch(&self) -> &ScratchAlloc { + self.scratch + } + + #[inline] + fn deref_static(&self, addr: u64) -> Result, EvalError> { + self.header.deref_static(addr) + } + + fn event_data(&self) -> Result<&[u8], EvalError> { + Ok(self.data) + } + + fn header(&self) -> Result<&Header, EvalError> { + Ok(self.header) + } +} + +pub struct ArithInfo<'a> { + typ: &'a Type, + rank: u32, + width: FileSize, + signed: Type, + unsigned: Type, +} + +impl<'a> ArithInfo<'a> { + #[inline] + pub fn is_signed(&self) -> bool { + self.typ == &self.signed + } + + #[inline] + pub fn signedness(&self) -> Signedness { + if self.is_signed() { + Signedness::Signed + } else { + Signedness::Unsigned + } + } +} + +impl Type { + #[inline] + pub fn is_arith(&self) -> bool { + self.arith_info().is_some() + } + + pub fn arith_info(&self) -> Option { + let typ = self.resolve_wrapper(); + + use Type::*; + match typ { + U8 | I8 | Bool => Some(ArithInfo { + typ, + rank: 0, + signed: I8, + unsigned: U8, + width: 8, + }), + U16 | I16 => Some(ArithInfo { + typ, + rank: 1, + signed: I16, + unsigned: U16, + width: 16, + }), + U32 | I32 => Some(ArithInfo { + typ, + rank: 2, + signed: I32, + unsigned: U32, + width: 32, + }), + U64 | I64 => Some(ArithInfo { + typ, + rank: 3, + signed: I64, + unsigned: U64, + width: 64, + }), + _ => None, + } + } + + pub fn promote(self) -> Type { + match self.arith_info() { + Some(info) => { + if info.width <= 32 { + if info.is_signed() { + Type::I32 + } else { + Type::U32 + } + } else { + self + } + } + None => self, + } + } + + pub fn resolve_wrapper(&self) -> &Self { + match self { + Type::Typedef(typ, _) | Type::Enum(typ, _) => typ.resolve_wrapper(), + _ => self, + } + } + + pub fn decay_to_ptr(self) -> Type { + match self { + Type::Array(typ, ..) => Type::Pointer(typ), + typ => typ, + } + } +} + +type ArithConverter = dyn for<'a> Fn(Value<'a>) -> Result, EvalError> + Send + Sync; + +#[inline] +fn convert_arith(dst: &Type) -> Result, CompileError> { + macro_rules! convert { + ($typ:ty, $ctor:ident) => { + Ok(Box::new(|x| { + let x = match x { + Value::I64Scalar(x) => x as $typ, + Value::U64Scalar(x) => x as $typ, + val => return Err(EvalError::IllegalType(val.into_static().ok())), + }; + Ok(Value::$ctor(x.into())) + })) + }; + } + + use Type::*; + match dst.resolve_wrapper() { + Bool => convert!(u8, U64Scalar), + I8 => convert!(i8, I64Scalar), + U8 => convert!(u8, U64Scalar), + I16 => convert!(i16, I64Scalar), + U16 => convert!(u16, U64Scalar), + I32 => convert!(i32, I64Scalar), + U32 => convert!(u32, U64Scalar), + I64 => convert!(i64, I64Scalar), + U64 => convert!(u64, U64Scalar), + typ => Err(CompileError::NonArithmeticOperand(typ.clone())), + } +} + +fn usual_arith_conv(lhs: Type, rhs: Type) -> Result { + let lhs = lhs.promote(); + let rhs = rhs.promote(); + + match (lhs.arith_info(), rhs.arith_info()) { + (Some(lhs_info), Some(rhs_info)) => Ok({ + if lhs == rhs { + lhs + } else if lhs_info.is_signed() == rhs_info.is_signed() { + if lhs_info.rank > rhs_info.rank { + lhs + } else { + rhs + } + } else { + let (styp, styp_info, utyp, utyp_info) = if lhs_info.is_signed() { + (&lhs, lhs_info, &rhs, rhs_info) + } else { + (&rhs, rhs_info, &lhs, lhs_info) + }; + + if utyp_info.rank >= styp_info.rank { + utyp.clone() + } else if styp_info.width - 1 >= utyp_info.width { + styp.clone() + } else { + styp_info.unsigned + } + } + }), + (None, _) => Err(CompileError::NonArithmeticOperand(lhs)), + (_, None) => Err(CompileError::NonArithmeticOperand(rhs)), + } +} + +#[inline] +fn convert_arith_ops( + _abi: &Abi, + lhs: Type, + rhs: Type, +) -> Result<(Type, Box, Box), CompileError> { + let typ = usual_arith_conv(lhs.clone(), rhs.clone())?; + Ok((typ.clone(), convert_arith(&typ)?, convert_arith(&typ)?)) +} + +#[inline] +fn convert_arith_op<'ce, CE>( + abi: &Abi, + cenv: &CE, + expr: &Expr, +) -> Result<(Type, Box), CompileError> +where + CE: CompileEnv<'ce>, +{ + let typ = expr.typ(cenv, abi)?; + let promoted = typ.promote(); + Ok((promoted.clone(), convert_arith(&promoted)?)) +} + +impl Type { + pub fn size(&self, abi: &Abi) -> Result { + let typ = self.resolve_wrapper(); + use Type::*; + match typ { + Pointer(_) => Ok(abi.long_size.into()), + Array(typ, size) => match size { + ArrayKind::Fixed(Ok(size)) => { + let item = typ.size(abi)?; + Ok(size * item) + } + _ => Err(CompileError::UnknownSize(*typ.clone())), + }, + _ => { + let info = typ + .arith_info() + .ok_or_else(|| CompileError::UnknownSize(typ.clone()))?; + Ok(info.width / 8) + } + } + } + + fn to_arith(&self, abi: &Abi) -> Result { + match self.resolve_wrapper() { + Type::Pointer(_) | Type::Array(..) => match abi.long_size { + LongSize::Bits32 => Ok(Type::U32), + LongSize::Bits64 => Ok(Type::U64), + }, + typ => { + // Check it's an arithmetic type + typ.arith_info() + .ok_or_else(|| CompileError::NonArithmeticOperand(typ.clone()))?; + Ok(typ.clone()) + } + } + } +} + +impl Expr { + pub fn typ<'ce, CE>(&self, cenv: &CE, abi: &Abi) -> Result + where + CE: CompileEnv<'ce>, + { + use Expr::*; + + let recurse = |expr: &Expr| expr.typ(cenv, abi); + + match self { + Evaluated(typ, _) => Ok(typ.clone()), + Uninit => Ok(Type::Unknown), + Variable(typ, _id) => Ok(typ.clone()), + + InitializerList(_) => Ok(Type::Unknown), + DesignatedInitializer(_, init) => recurse(init), + CompoundLiteral(typ, _) => Ok(typ.clone()), + + IntConstant(typ, _) | CharConstant(typ, _) | EnumConstant(typ, _) => Ok(typ.clone()), + StringLiteral(str) => { + let len: u64 = str.len().try_into().unwrap(); + // null terminator + let len = len + 1; + Ok(Type::Array( + Box::new(abi.char_typ()), + ArrayKind::Fixed(Ok(len)), + )) + } + + Addr(expr) => Ok(Type::Pointer(Box::new(recurse(expr)?))), + Deref(expr) => match recurse(expr)?.resolve_wrapper() { + Type::Pointer(typ) | Type::Array(typ, _) => Ok(*typ.clone()), + typ => Err(CompileError::CannotDeref(typ.clone(), *expr.clone())), + }, + Plus(expr) | Minus(expr) | Tilde(expr) => Ok(recurse(expr)?.promote()), + Bang(_) => Ok(Type::I32), + Cast(typ, _) => Ok(typ.clone()), + SizeofType(..) | SizeofExpr(_) => Ok(match &abi.long_size { + LongSize::Bits32 => Type::U32, + LongSize::Bits64 => Type::U64, + }), + PreInc(expr) | PreDec(expr) | PostInc(expr) | PostDec(expr) => recurse(expr), + + MemberAccess(expr, member) => match recurse(expr)?.resolve_wrapper() { + Type::Variable(id) if id == "REC" => Ok(cenv.field_typ(member)?), + _ => Ok(Type::Unknown), + }, + FuncCall(..) => Ok(Type::Unknown), + Subscript(expr, idx) => { + let idx = recurse(idx)?; + match idx.arith_info() { + Some(info) => Ok(info), + None => Err(CompileError::NonArithmeticOperand(idx)) + }?; + + match recurse(expr)?.resolve_wrapper() { + Type::Array(typ, _) | Type::Pointer(typ) => Ok(*typ.clone()), + typ => Err(CompileError::NotAnArray(typ.clone())), + } + } + + Assign(_lhs, rhs) => recurse(rhs), + + Eq(..) | NEq(..) | LoEq(..) | HiEq(..) | Hi(..) | Lo(..) | And(..) | Or(..) => { + Ok(Type::I32) + } + + LShift(expr, _) | RShift(expr, _) => Ok(recurse(expr)?.promote()), + + Mul(lhs, rhs) + | Div(lhs, rhs) + | Mod(lhs, rhs) + | Add(lhs, rhs) + | Sub(lhs, rhs) + | BitAnd(lhs, rhs) + | BitOr(lhs, rhs) + | BitXor(lhs, rhs) => { + let lhs = recurse(lhs)?.resolve_wrapper().clone().decay_to_ptr(); + let rhs = recurse(rhs)?.resolve_wrapper().clone().decay_to_ptr(); + match (&lhs, &rhs) { + (Type::Pointer(_lhs), _rhs) if _rhs.is_arith() => Ok(lhs), + (_lhs, Type::Pointer(_rhs)) if _lhs.is_arith() => Ok(rhs), + _ => usual_arith_conv(lhs, rhs) + } + } + Ternary(_, lhs, rhs) => { + let lhstyp = recurse(lhs)?.resolve_wrapper().clone().decay_to_ptr(); + let rhstyp = recurse(rhs)?.resolve_wrapper().clone().decay_to_ptr(); + + fn is_null_ptr_cst(expr: &Expr) -> bool { + match expr { + Expr::IntConstant(_, 0) => true, + Expr::Cast(Pointer(pointee), expr) if pointee.deref() == &Void => is_null_ptr_cst(expr), + _ => false, + } + } + + use Type::*; + match usual_arith_conv(lhstyp.clone(), rhstyp.clone()) { + Ok(typ) => Ok(typ), + Err(_) => match (&lhstyp, &rhstyp) { + (lhstyp_, rhstyp_) if lhstyp_ == rhstyp_ => Ok(lhstyp), + (Pointer(_), _) if is_null_ptr_cst(rhs) => Ok(lhstyp), + (_, Pointer(_)) if is_null_ptr_cst(lhs) => Ok(rhstyp), + (Pointer(inner), Pointer(_)) | (Pointer(_), Pointer(inner)) if inner.deref() == &Void => Ok(Pointer(Box::new(Void))), + (Pointer(lhstyp_), Pointer(rhstyp_)) + if lhstyp_.resolve_wrapper() == rhstyp_.resolve_wrapper() => + { + Ok(Pointer(Box::new(lhstyp))) + } + _ => Err(CompileError::MismatchingOperandType(self.clone(), lhstyp, rhstyp)), + }, + } + } + CommaExpr(exprs) => recurse(exprs.last().unwrap()), + + ExtensionMacro(desc) => match &desc.kind { + ExtensionMacroKind::ObjectLike { typ, .. } => Ok(typ.clone()), + ExtensionMacroKind::FunctionLike { .. } => Ok(Type::Unknown), + }, + ExtensionMacroCall(cparser::ExtensionMacroCall { compiler, .. }) => { + compiler.ret_typ.typ(cenv, abi) + } + } + } +} + +pub trait Evaluator: Send + Sync { + fn eval<'eeref, 'ee>( + &self, + env: &'eeref (dyn EvalEnv<'ee> + 'eeref), + ) -> Result, EvalError>; +} + +impl Evaluator for F +where + F: for<'ee, 'eeref> Fn(&'eeref (dyn EvalEnv<'ee> + 'eeref)) -> Result, EvalError> + + Send + + Sync, +{ + fn eval<'eeref, 'ee>( + &self, + env: &'eeref (dyn EvalEnv<'ee> + 'eeref), + ) -> Result, EvalError> { + self(env) + } +} + +// TODO: the day Rust infers correctly HRTB for closures, this won't be necessary anymore +#[inline] +pub fn new_dyn_evaluator(f: F) -> Box +where + F: for<'ee, 'eeref> Fn(&'eeref (dyn EvalEnv<'ee> + 'eeref)) -> Result, EvalError> + + Send + + Sync + + 'static, +{ + Box::new(f) +} + +impl Expr { + pub fn eval_const(self, abi: &Abi, f: F) -> T + where + F: for<'a> FnOnce(Result, InterpError>) -> T, + { + let env = EmptyEnv::new(); + let eval = || -> Result<_, InterpError> { + let eval = self.compile(&env, abi)?; + Ok(eval.eval(&env)?) + }; + f(eval()) + } + + pub fn simplify<'ce, CE>(self, cenv: &'ce CE, abi: &Abi) -> Expr + where + CE: CompileEnv<'ce>, + { + let compiled = self.clone().compile(cenv, abi); + self._do_simplify(cenv, abi, compiled) + } + + fn _simplify<'ce, CE>(self, cenv: &'ce CE, abi: &Abi) -> Expr + where + CE: CompileEnv<'ce>, + { + let compiled = self.clone()._compile(cenv, abi); + self._do_simplify(cenv, abi, compiled) + } + + fn _do_simplify<'ce, CE>( + self, + cenv: &'ce CE, + abi: &Abi, + compiled: Result, CompileError>, + ) -> Expr + where + CE: CompileEnv<'ce>, + { + match compiled { + Ok(eval) => match self.typ(cenv, abi) { + Ok(typ) => match eval.eval(cenv) { + Ok(value) => match value.into_static() { + Ok(value) => Expr::Evaluated(typ, value), + Err(_) => self, + }, + Err(_) => self, + }, + Err(_) => self, + }, + Err(_) => self, + } + } + + pub fn compile<'ce, CE>( + self, + cenv: &'ce CE, + abi: &Abi, + ) -> Result, CompileError> + where + CE: CompileEnv<'ce>, + { + // Type check the AST. This should be done only once on the root node, so any recursive + // compilation invocations are done via _compile() to avoid re-doing it and avoid an O(N^2) + // complexity + self.typ(cenv, abi)?; + self._compile(cenv, abi) + } + + fn _compile<'ce, CE>(self, cenv: &'ce CE, abi: &Abi) -> Result, CompileError> + where + CE: CompileEnv<'ce>, + { + use Expr::*; + let cannot_handle = |expr| Err(CompileError::ExprNotHandled(expr)); + let recurse = |expr: Expr| expr._compile(cenv, abi); + let simplify = |expr: Expr| expr._simplify(cenv, abi); + let uintptr_t = abi.ulong_typ(); + + fn to_signed(x: u64) -> Result { + x.try_into() + .map_err(|_| EvalError::CannotConvertToSigned(x)) + } + + fn multiply_by_pointee_size(pointee: Type, expr: Expr) -> Expr { + match pointee { + Type::Void | Type::U8 | Type::I8 => expr, + _ => Mul(Box::new(expr), Box::new(SizeofType(pointee))), + } + } + + macro_rules! binop { + ($ctor:expr, $lhs:expr, $rhs:expr, $op:expr) => {{ + let lhs = *$lhs; + let rhs = *$rhs; + let eval_lhs = recurse(lhs.clone())?; + let eval_rhs = recurse(rhs.clone())?; + + let lhstyp = lhs.typ(cenv, abi)?; + let rhstyp = rhs.typ(cenv, abi)?; + + let lhsisarith = lhstyp.is_arith(); + let rhsisarith = rhstyp.is_arith(); + + let ctor: Option, Box) -> Expr> = $ctor; + match (lhstyp, lhsisarith, rhstyp, rhsisarith, ctor) { + // Binary operation between two arithmetic types + (lhs, true, rhs, true, _) => { + let (_typ, conv_lhs, conv_rhs) = convert_arith_ops(abi, lhs, rhs)?; + + Ok(new_dyn_evaluator(move |env| { + let lhs = conv_lhs(eval_lhs.eval(env)?)?; + let rhs = conv_rhs(eval_rhs.eval(env)?)?; + + match (lhs, rhs) { + (Value::U64Scalar(x), Value::U64Scalar(y)) => + { + #[allow(clippy::redundant_closure_call)] + Ok(Value::U64Scalar($op(Wrapping(x), Wrapping(y)).0)) + } + (Value::I64Scalar(x), Value::I64Scalar(y)) => + { + #[allow(clippy::redundant_closure_call)] + Ok(Value::I64Scalar($op(Wrapping(x), Wrapping(y)).0)) + } + (val, _) => Err(EvalError::IllegalType(val.into_static().ok())), + } + })) + } + // Pointer arithmetic + (Type::Pointer(_lhstyp), _, _rhstyp, true, Some(ctor))=> { + // Convert "A op B" where type(A) = Pointer(Ta) and type(B) is arithmetic + // into: + // (Ta*)((u64)A + B * sizeof(TA)) + recurse( + Cast( + Type::Pointer(_lhstyp.clone()), + Box::new(ctor( + Box::new(Cast(uintptr_t.clone(), Box::new(lhs))), + Box::new(multiply_by_pointee_size(*_lhstyp, rhs)), + )) + ) + ) + } + + (_lhstyp, true, Type::Pointer(_rhstyp), _, Some(ctor)) => { + recurse( + Cast( + Type::Pointer(_rhstyp.clone()), + Box::new(ctor( + Box::new(multiply_by_pointee_size(*_rhstyp, lhs)), + Box::new(Cast(uintptr_t.clone(), Box::new(rhs))), + )) + ) + ) + } + (lhs, false, _, _, _) => Err(CompileError::NonArithmeticOperand(lhs)), + (_, _, rhs, false, _) => Err(CompileError::NonArithmeticOperand(rhs)), + } + }}; + } + + macro_rules! comp { + ($lhs:expr, $rhs:expr, $op:expr) => {{ + let lhs = *$lhs; + let rhs = *$rhs; + let eval_lhs = recurse(lhs.clone())?; + let eval_rhs = recurse(rhs.clone())?; + + let lhs = lhs.typ(cenv, abi)?.to_arith(abi)?; + let rhs = rhs.typ(cenv, abi)?.to_arith(abi)?; + + let (_typ, conv_lhs, conv_rhs) = convert_arith_ops(abi, lhs, rhs)?; + + Ok(new_dyn_evaluator(move |env| { + let lhs = conv_lhs(eval_lhs.eval(env)?)?; + let rhs = conv_rhs(eval_rhs.eval(env)?)?; + + match (lhs, rhs) { + (Value::U64Scalar(x), Value::U64Scalar(y)) => + { + #[allow(clippy::redundant_closure_call)] + Ok(Value::I64Scalar($op(x, y))) + } + (Value::I64Scalar(x), Value::I64Scalar(y)) => + { + #[allow(clippy::redundant_closure_call)] + Ok(Value::I64Scalar($op(x, y))) + } + (val, _) => Err(EvalError::IllegalType(val.into_static().ok())), + } + })) + }}; + } + + macro_rules! shift { + ($lhs:expr, $rhs:expr, $op:expr) => {{ + let lhs = *$lhs; + let rhs = *$rhs; + let eval_lhs = recurse(lhs.clone())?; + let eval_rhs = recurse(rhs.clone())?; + + let (_typ, conv_lhs) = convert_arith_op(abi, cenv, &lhs)?; + let (_, conv_rhs) = convert_arith_op(abi, cenv, &rhs)?; + + Ok(new_dyn_evaluator(move |env| { + let lhs = conv_lhs(eval_lhs.eval(env)?)?; + let rhs = conv_rhs(eval_rhs.eval(env)?)?; + + match (lhs, rhs) { + (Value::U64Scalar(x), Value::U64Scalar(y)) => + { + #[allow(clippy::redundant_closure_call)] + Ok(Value::U64Scalar($op(x, y))) + } + (Value::U64Scalar(x), Value::I64Scalar(y)) => + { + #[allow(clippy::redundant_closure_call)] + Ok(Value::U64Scalar($op(x, y))) + } + + (Value::I64Scalar(x), Value::U64Scalar(y)) => + { + #[allow(clippy::redundant_closure_call)] + Ok(Value::I64Scalar($op(x, y))) + } + (Value::I64Scalar(x), Value::I64Scalar(y)) => + { + #[allow(clippy::redundant_closure_call)] + Ok(Value::I64Scalar($op(x, y))) + } + (val, _) => Err(EvalError::IllegalType(val.into_static().ok())), + } + })) + }}; + } + + let eval = match self { + Evaluated(_typ, value) => Ok(new_dyn_evaluator(move |_| Ok(value.clone()))), + Variable(_typ, id) => Ok(new_dyn_evaluator(move |_| Ok(Value::Variable(id.clone())))), + + MemberAccess(expr, member) => { + let expr = simplify(*expr); + match &expr { + Variable(_, id) | Evaluated(_, Value::Variable(id)) if id == "REC" => { + cenv.field_getter(&member) + } + _ => cannot_handle(expr), + } + } + + expr @ (Uninit + | InitializerList(_) + | DesignatedInitializer(..) + | CompoundLiteral(..)) => cannot_handle(expr), + IntConstant(typ, x) | CharConstant(typ, x) => { + let typ = typ.to_arith(abi)?; + let info = match typ.arith_info() { + Some(info) => Ok(info), + None => Err(CompileError::NonArithmeticOperand(typ)), + }?; + Ok(if info.is_signed() { + new_dyn_evaluator(move |_| Ok(Value::I64Scalar(to_signed(x)?))) + } else { + new_dyn_evaluator(move |_| Ok(Value::U64Scalar(x))) + }) + } + StringLiteral(s) => { + let s: Arc = Arc::from(s.as_ref()); + Ok(new_dyn_evaluator(move |_| { + Ok(Value::Str(Str::new_arc(Arc::clone(&s)))) + })) + } + expr @ EnumConstant(..) => cannot_handle(expr), + SizeofType(typ) => { + let size = Ok(Value::U64Scalar(typ.size(abi)?)); + Ok(new_dyn_evaluator(move |_| size.clone())) + } + SizeofExpr(expr) => { + let typ = expr.typ(cenv, abi)?; + recurse(SizeofType(typ)) + } + Cast(typ, expr) => { + let expr = *expr; + let typ = typ.decay_to_ptr(); + let expr_typ = expr.typ(cenv, abi)?.decay_to_ptr(); + + let expr_typ: &Type = expr_typ.resolve_wrapper(); + let typ: &Type = typ.resolve_wrapper(); + + match (typ, expr) { + (typ, expr) if typ == expr_typ => recurse(expr), + // Chains of cast (T1*)(T2*)...x is equivalent to (T1*)x + (Type::Pointer(_), Cast(Type::Pointer(_), expr)) => { + recurse(Cast(typ.clone(), expr)) + } + (typ, expr) => match (typ, expr_typ) { + ( + Type::Pointer(typ), + Type::Pointer(expr_typ) | Type::Array(expr_typ, _), + ) => { + let typ: &Type = typ.deref().resolve_wrapper(); + let expr_typ = expr_typ.resolve_wrapper(); + match (expr_typ, typ) { + (expr_typ, typ) if typ == expr_typ => recurse(expr), + // (void *)(T *)x is treated the same as (T *)x + (_, Type::Void) => recurse(expr), + + // For integer types: + // T x; + // (T2*)&x == &(T2)x + // Note that this is only well defined if T2 is char in + // first approximation + ( + Type::Bool + | Type::U8 + | Type::I8 + | Type::U16 + | Type::I16 + | Type::U32 + | Type::I32 + | Type::I64 + | Type::U64, + typ, + ) if typ == &Type::Bool || typ == &Type::U8 || typ == &Type::I8 => { + recurse(Addr(Box::new(Cast( + typ.clone(), + Box::new(Deref(Box::new(expr))), + )))) + } + (expr_typ, typ) => Err(CompileError::IncompatiblePointerCast( + expr_typ.clone(), + typ.clone(), + )), + } + } + (typ, _expr_typ) => { + // Convert potential pointers to an integer type for the + // sake of value conversion. + let typ = typ.to_arith(abi)?; + let conv = convert_arith(&typ)?; + let eval = recurse(expr)?; + Ok(new_dyn_evaluator(move |x| conv(eval.eval(x)?))) + } + }, + } + } + Plus(expr) => { + let expr = *expr; + let (_typ, conv) = convert_arith_op(abi, cenv, &expr)?; + let eval = recurse(expr)?; + + Ok(new_dyn_evaluator(move |x| conv(eval.eval(x)?))) + } + Minus(expr) => { + let (_typ, conv) = convert_arith_op(abi, cenv, &expr)?; + + macro_rules! negate { + ($value:expr) => { + match $value { + Value::I64Scalar(x) => conv(Value::I64Scalar(-x)), + Value::U64Scalar(x) => conv(Value::I64Scalar(-(x as i64))), + val => Err(EvalError::IllegalType(val.into_static().ok())), + } + }; + } + + let eval = recurse(*expr)?; + match eval.eval(cenv) { + Err(_) => Ok(new_dyn_evaluator(move |env| { + let value = eval.eval(env)?; + negate!(value) + })), + Ok(value) => { + let value = negate!(value); + Ok(new_dyn_evaluator(move |_| value.clone())) + } + } + } + Tilde(expr) => { + let expr = *expr; + let (typ, _conv) = convert_arith_op(abi, cenv, &expr)?; + let eval = recurse(expr)?; + + macro_rules! complement { + ($unsigned:ty, $signed:ty) => { + Ok(new_dyn_evaluator(move |env| match eval.eval(env)? { + Value::I64Scalar(x) => Ok(Value::I64Scalar((!(x as $signed)) as i64)), + Value::U64Scalar(x) => Ok(Value::U64Scalar((!(x as $unsigned)) as u64)), + val => Err(EvalError::IllegalType(val.into_static().ok())), + })) + }; + } + + use Type::*; + match typ { + Bool => complement!(u8, i8), + U8 | I8 => complement!(u8, i8), + U16 | I16 => complement!(u16, i16), + U32 | I32 => complement!(u32, i32), + U64 | I64 => complement!(u64, i64), + _ => Err(CompileError::NonArithmeticOperand(typ)), + } + } + Bang(expr) => { + let eval = recurse(*expr)?; + + Ok(new_dyn_evaluator(move |env| match eval.eval(env)? { + Value::U64Scalar(x) => Ok(Value::I64Scalar((x == 0).into())), + Value::I64Scalar(x) => Ok(Value::I64Scalar((x == 0).into())), + val => Err(EvalError::IllegalType(val.into_static().ok())), + })) + } + + Addr(expr) => { + let eval = recurse(*expr)?; + Ok(new_dyn_evaluator(move |env| { + let val = eval.eval(env)?; + let val = ScratchBox::Owned(OwnedScratchBox::new_in(val, env.scratch())); + Ok(Value::Addr(val)) + })) + } + Deref(expr) => recurse(Subscript(expr, Box::new(IntConstant(Type::I32, 0)))), + + // Since there can be sequence points inside an expression in a number + // of ways, we would need a mutable environment to keep track of it, so + // ignore it for now as this does not seem to be used in current + // kernels. + // https://port70.net/~nsz/c/c11/n1570.html#C + expr @ (PostInc(_) | PostDec(_) | PreInc(_) | PreDec(_)) => cannot_handle(expr), + expr @ Assign(..) => cannot_handle(expr), + + Ternary(cond, lhs, rhs) => { + let lhs = *lhs; + let rhs = *rhs; + + let eval_cond = recurse(*cond)?; + let eval_lhs = recurse(lhs.clone())?; + let eval_rhs = recurse(rhs.clone())?; + + let lhs_typ = lhs.typ(cenv, abi)?; + let rhs_typ = rhs.typ(cenv, abi)?; + + let lhs_info = lhs_typ.arith_info(); + let rhs_info = rhs_typ.arith_info(); + + match (lhs_info, rhs_info) { + (Some(_), Some(_)) => { + let (_, conv_lhs, conv_rhs) = convert_arith_ops(abi, lhs_typ, rhs_typ)?; + Ok(new_dyn_evaluator(move |env| match eval_cond.eval(env)? { + Value::U64Scalar(0) | Value::I64Scalar(0) => { + conv_rhs(eval_rhs.eval(env)?) + } + _ => conv_lhs(eval_lhs.eval(env)?), + })) + } + _ => Ok(new_dyn_evaluator(move |env| match eval_cond.eval(env)? { + Value::U64Scalar(0) | Value::I64Scalar(0) => eval_rhs.eval(env), + _ => eval_lhs.eval(env), + })), + } + } + CommaExpr(mut exprs) => recurse(exprs.pop().unwrap()), + + Subscript(expr, idx) => { + let expr = *expr; + let eval_idx = recurse(*idx)?; + let eval_expr = recurse(expr.clone())?; + + match eval_idx.eval(cenv) { + // If we access element 0 at compile time, that is simply + // dereferencing the value as a pointer. + Ok(Value::U64Scalar(0) | Value::I64Scalar(0)) => { + let expr_typ = expr.typ(cenv, abi)?; + let expr_typ = expr_typ.resolve_wrapper(); + + match expr_typ { + // Dereferencing a pointer to array gives the array, + // which in most contexts will behave like the + // address to the array when manipulated except when + // used with & (address of first array element) and + // sizeof operators (number of elements in the + // array). Dereferencing such address will work with + // this interpreter, and the sizeof() implementation + // is done by inspecting the type only. + Type::Pointer(inner) if matches!(inner.deref(), Type::Array(..)) => recurse(Expr::Cast(inner.deref().clone(), Box::new(expr))), + Type::Pointer(typ) | Type::Array(typ, ..) => { + // We might need the conversion as it is legal to cast e.g. an int* to a char* + let conv = convert_arith(typ).unwrap_or(Box::new(|x| Ok(x))); + Ok(new_dyn_evaluator(move |env| { + conv(eval_expr.eval(env)?.get(env, 0)?) + })) + } + _ => cannot_handle(expr), + } + } + _ => Ok(new_dyn_evaluator(move |env| { + let idx: u64 = match eval_idx.eval(env)? { + Value::U64Scalar(x) => Ok(x), + Value::I64Scalar(x) => Ok(x as u64), + val => Err(EvalError::IllegalType(val.into_static().ok())), + }?; + let idx: usize = idx.try_into().unwrap(); + eval_expr.eval(env)?.get(env, idx) + })), + } + } + + Mul(lhs, rhs) => binop!(None, lhs, rhs, |x, y| x * y), + Div(lhs, rhs) => binop!(None, lhs, rhs, |x, y| x / y), + Mod(lhs, rhs) => binop!(None, lhs, rhs, |x, y| x % y), + Add(lhs, rhs) => binop!(Some(Add), lhs, rhs, |x, y| x + y), + Sub(lhs, rhs) => binop!(Some(Sub), lhs, rhs, |x, y| x - y), + + BitAnd(lhs, rhs) => binop!(None, lhs, rhs, |x, y| x & y), + BitOr(lhs, rhs) => binop!(None, lhs, rhs, |x, y| x | y), + BitXor(lhs, rhs) => binop!(None, lhs, rhs, |x, y| x ^ y), + + Eq(lhs, rhs) => comp!(lhs, rhs, |x, y| (x == y).into()), + NEq(lhs, rhs) => comp!(lhs, rhs, |x, y| (x != y).into()), + LoEq(lhs, rhs) => comp!(lhs, rhs, |x, y| (x <= y).into()), + HiEq(lhs, rhs) => comp!(lhs, rhs, |x, y| (x >= y).into()), + Lo(lhs, rhs) => comp!(lhs, rhs, |x, y| (x < y).into()), + Hi(lhs, rhs) => comp!(lhs, rhs, |x, y| (x > y).into()), + + And(lhs, rhs) => comp!(lhs, rhs, |x, y| ((x != 0) && (y != 0)).into()), + Or(lhs, rhs) => comp!(lhs, rhs, |x, y| ((x != 0) || (y != 0)).into()), + + LShift(lhs, rhs) => shift!(lhs, rhs, |x, y| x << y), + RShift(lhs, rhs) => shift!(lhs, rhs, |x, y| x >> y), + + ExtensionMacro(desc) => { + let kind = &desc.kind; + match kind { + ExtensionMacroKind::ObjectLike { value, .. } => { + let value = value.clone(); + Ok(new_dyn_evaluator(move |_env| Ok(value.clone()))) + } + // We cannot do anything with a bare function-like macro, it has + // to be applied to an expression, at which point the parser + // gives us a ExtensionMacroCall. + ExtensionMacroKind::FunctionLike { .. } => cannot_handle(ExtensionMacro(desc)), + } + } + ExtensionMacroCall(call) => (call.compiler.compiler)(cenv, abi), + + expr @ FuncCall(..) => cannot_handle(expr), + }?; + + // Compile-time evaluation, if that succeeds we simply replace the evaluator + // by a closure that clones the precomputed value. + match eval.eval(cenv) { + Ok(value) => match value.into_static() { + Err(_) => Ok(eval), + Ok(value) => Ok(new_dyn_evaluator(move |_| Ok(value.clone()))), + }, + Err(_err) => Ok(eval), + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::Mutex; + + use nom::AsBytes; + + use super::*; + use crate::{ + cparser::{CGrammar, CGrammarCtx, DynamicKind, Type}, + grammar::PackratGrammar as _, + header::{Abi, Endianness, LongSize}, + parser::tests::{run_parser, zero_copy_to_str}, + }; + + #[derive(Clone, Copy)] + enum Stage { + Compile, + Run, + } + + struct TestEnv { + string: String, + scratch: ScratchAlloc, + stage: Arc>, + } + + impl<'ce> CompileEnv<'ce> for TestEnv { + #[inline] + fn field_typ(&self, id: &str) -> Result { + match id { + "runtime_u32_field" => Ok(Type::U32), + "runtime_zero_field" => Ok(Type::U32), + "u32_field" => Ok(Type::U32), + "u32_array_field" => Ok(Type::Array(Box::new(Type::U32), ArrayKind::Fixed(Ok(2)))), + "u32_dynarray_field" => Ok(Type::Array( + Box::new(Type::U32), + ArrayKind::Dynamic(DynamicKind::Dynamic), + )), + "str_field" => Ok(Type::Pointer(Box::new(Type::U8))), + + "string_field" => Ok(Type::Pointer(Box::new(Type::U8))), + "owned_string_field" => Ok(Type::Pointer(Box::new(Type::U8))), + "runtime_u32_ptr" => Ok(Type::Pointer(Box::new(Type::U32))), + "runtime_void_ptr" => Ok(Type::Pointer(Box::new(Type::Void))), + "runtime_char_ptr" => Ok(Type::Pointer(Box::new(Type::U8))), + id => Err(CompileError::UnknownField(id.into())), + } + } + + #[inline] + fn field_getter(&self, id: &str) -> Result, CompileError> { + match id { + "runtime_u32_field" => { + let stage = Arc::clone(&self.stage); + Ok(new_dyn_evaluator(move |_env| match *stage.lock().unwrap() { + Stage::Compile => Err(EvalError::NoEventData), + Stage::Run => Ok(Value::U64Scalar(44)), + })) + } + "runtime_zero_field" => { + let stage = Arc::clone(&self.stage); + Ok(new_dyn_evaluator(move |_env| match *stage.lock().unwrap() { + Stage::Compile => Err(EvalError::NoEventData), + Stage::Run => Ok(Value::U64Scalar(0)), + })) + } + "u32_field" => Ok(new_dyn_evaluator(|_| Ok(Value::U64Scalar(42)))), + "u32_array_field" => Ok(new_dyn_evaluator(|_| { + Ok(Value::U32Array([42, 43].as_ref().into())) + })), + "u32_dynarray_field" => Ok(new_dyn_evaluator(|_| { + Ok(Value::U32Array([42, 43].as_ref().into())) + })), + "str_field" => Ok(new_dyn_evaluator(|_| { + Ok(Value::Str(Str::new_owned("hello world".into()))) + })), + + "string_field" => { + let s = Arc::from(&*self.string); + Ok(new_dyn_evaluator(move |_| { + Ok(Value::Str(Str::new_arc(Arc::clone(&s)))) + })) + } + "owned_string_field" => { + let array = Value::Str(Str::new_owned((&self.string).into())); + Ok(new_dyn_evaluator(move |_| Ok(array.clone()))) + } + "runtime_u32_ptr" | "runtime_void_ptr" | "runtime_char_ptr" => Ok(new_dyn_evaluator(|_| { + Ok(Value::U64Scalar(42)) + })), + id => Err(CompileError::UnknownField(id.into())), + } + } + } + + impl<'ee> EvalEnv<'ee> for TestEnv { + // #[inline] + // fn field_getter(&self, id: &str) -> Result Result>, CompileError> { + // Ok(Box::new(|_| Ok(Value::U64Scalar(42)))) + // } + + fn header(&self) -> Result<&Header, EvalError> { + Err(EvalError::NoHeader) + } + + fn deref_static(&self, addr: Address) -> Result, EvalError> { + match addr { + 42 => Ok(Value::Str(Str::new_borrowed("hello world"))), + 43 => Ok(Value::U64Scalar(105)), + 44 => Ok(Value::U64Scalar(257)), + 45 => Ok(Value::U64Scalar(44)), + 46 => Ok(Value::U64Scalar(45)), + 47 => Ok(Value::U32Array(Array::Borrowed(&[30, 31, 32, 33]))), + addr => Err(EvalError::CannotDeref(addr)), + } + } + + fn event_data(&self) -> Result<&[u8], EvalError> { + Err(EvalError::NoEventData) + } + + #[inline] + fn scratch(&self) -> &ScratchAlloc { + &self.scratch + } + } + + #[test] + fn interp_test() { + fn test(src: &[u8], expected: Value<'_>) { + let stage = Arc::new(Mutex::new(Stage::Compile)); + let env = TestEnv { + scratch: ScratchAlloc::new(), + string: "foobar".into(), + stage: Arc::clone(&stage), + }; + let abi = Abi { + long_size: LongSize::Bits64, + endianness: Endianness::Little, + char_signedness: Signedness::Unsigned, + }; + let parser = CGrammar::expr(); + let ctx = CGrammarCtx::new(&abi); + let input = CGrammar::make_span(src, &ctx); + let ast = run_parser(input.clone(), parser); + let input = zero_copy_to_str(input.as_bytes()); + let compiled = ast + .compile(&env, &abi) + .unwrap_or_else(|err| panic!("Error while compiling {input:?}: {err}")); + + *stage.lock().unwrap() = Stage::Run; + + let expr = compiled + .eval(&env) + .unwrap_or_else(|err| panic!("Error while interpreting {input:?}: {err}")); + assert_eq!(expr, expected, "while interpreting {input:}") + } + + fn signed(x: i64) -> Value<'static> { + Value::I64Scalar(x) + } + fn unsigned(x: u64) -> Value<'static> { + Value::U64Scalar(x) + } + fn addr(x: Value<'static>) -> Value<'static> { + Value::Addr(ScratchBox::Arc(Arc::new(x))) + } + + let hello_world = Value::Str(Str::new_arc(Arc::from("hello world"))); + + // Literals + test(b"0", signed(0)); + test(b"1", signed(1)); + test(b"-1", signed(-1)); + test(b"-1u", unsigned(4294967295)); + test(b"-(1u)", unsigned(4294967295)); + test(b"-(-(-(1u)))", unsigned(4294967295)); + test(b"-(-(1u))", unsigned(1)); + test(b"-(-(1UL))", unsigned(1)); + + test(br#""hello world""#, hello_world.clone()); + + test(b"true", signed(1)); + test(b"false", signed(0)); + + // Basic arithmetic + test(b"(-1)", signed(-1)); + test(b"1+2", signed(3)); + test(b"1u+2u", unsigned(3)); + test(b"1+2u", unsigned(3)); + test(b"1u+2", unsigned(3)); + test(b"(uint16_t)1u+(s32)2", unsigned(3)); + + test(b"-1+2", signed(1)); + test(b"(-1)+2", signed(1)); + test(b"2+(-1)", signed(1)); + test(b"2+((-1)*4)", signed(-2)); + test(b"(-1)*4", signed(-4)); + test(b"1+TASK_COMM_LEN", signed(17)); + test(b"-TASK_COMM_LEN + 1", signed(-15)); + test(b"-(s32)TASK_COMM_LEN + 1", signed(-15)); + + test(b"-1-2", signed(-3)); + test(b"1-TASK_COMM_LEN", signed(-15)); + test(b"-TASK_COMM_LEN - 1", signed(-17)); + + test(b"-TASK_COMM_LEN - 1u", unsigned(4294967279)); + + test(b"10 % 2", signed(0)); + test(b"11 % 2", signed(1)); + test(b"-11 % 2", signed(-1)); + test(b"11 % -2", signed(1)); + test(b"-11 % -2", signed(-1)); + test(b"((s64)(-11)) % ((s16)(-2))", signed(-1)); + + test(b"42 + ((-1) * 4)", signed(42 - 4 * 1)); + test(b"(( signed long)42) + ((-1) * 4)", signed(42 - 4 * 1)); + test(b"((unsigned long)42) - 4", unsigned(42 - 4 * 1)); + test(b"((unsigned long)42) + ((unsigned long)-4)", unsigned(42 - 4 * 1)); + test(b"((unsigned long)42) + (0-4)", unsigned(42 - 4 * 1)); + test(b"((unsigned long)42) + (-4)", unsigned(42 - 4 * 1)); + test(b"((unsigned long)42) + ((-1) * 4)", unsigned(42 - 4 * 1)); + + // Pointer arithmetic + test(b"(unsigned int*)(((unsigned long)(unsigned int*)42) -4)", unsigned(42 - 4 * 1)); + test(b"(unsigned int*)(((unsigned long)(unsigned int*)42) + (-4))", unsigned(42 - 4 * 1)); + test(b"((unsigned long)(unsigned int*)42) + ((-1) * 4)", unsigned(42 - 4 * 1)); + test(b"(unsigned int*)(((unsigned long)(unsigned int*)42) + ((-1) * 4))", unsigned(42 - 4 * 1)); + + test(b"REC->runtime_u32_ptr", unsigned(42)); + test(b"REC->runtime_u32_ptr + 1", unsigned(42 + 4 * 1)); + test(b"REC->runtime_u32_ptr - 1", unsigned(42 - 4 * 1)); + test(b"REC->runtime_u32_ptr + (-1)", unsigned(42 - 4 * 1)); + test(b"1 + REC->runtime_u32_ptr", unsigned(42 + 4 * 1)); + test(b"(-1) + 42", signed(42 - 1)); + test(b"(-1) + REC->runtime_u32_ptr", unsigned(42 - 4 * 1)); + + test(b"REC->runtime_void_ptr", unsigned(42)); + test(b"REC->runtime_void_ptr + 1", unsigned(42 + 1)); + test(b"REC->runtime_void_ptr - 1", unsigned(42 - 1)); + test(b"REC->runtime_void_ptr + (-1)", unsigned(42 - 1)); + test(b"1 + REC->runtime_void_ptr", unsigned(42 + 1)); + test(b"(-1) + REC->runtime_void_ptr", unsigned(42 - 1)); + + test(b"REC->runtime_char_ptr", unsigned(42)); + test(b"REC->runtime_char_ptr + 1", unsigned(42 + 1)); + test(b"REC->runtime_char_ptr - 1", unsigned(42 - 1)); + test(b"REC->runtime_char_ptr + (-1)", unsigned(42 - 1)); + test(b"1 + REC->runtime_char_ptr", unsigned(42 + 1)); + test(b"(-1) + REC->runtime_char_ptr", unsigned(42 - 1)); + + // Integer overflow + test(b"1 == 1", signed(1)); + test(b"1 == 2", signed(0)); + test(b"-1 == 4294967295", signed(0)); + test(b"-1u == 4294967295", signed(1)); + test(b"-1 == 4294967295u", signed(1)); + test(b"-1u == 4294967295u", signed(1)); + test(b"(u64)-1u == (unsigned int)4294967295u", signed(1)); + + // Comparisons + test(b"1 > 2", signed(0)); + test(b"2 > 1", signed(1)); + test(b"1 > -1u", signed(0)); + test(b"-1u > 1", signed(1)); + test(b"-1u < 1", signed(0)); + test(b"(u32)-1u > (s32)1", signed(1)); + + // Shifts + test(b"2 >> 1", signed(1)); + test(b"-2 >> 1", signed(-1)); + test(b"2 << 1", signed(4)); + test(b"-2 << 1", signed(-4)); + test(b"(s8)-2 << (s64)1", signed(-4)); + test(b"(s8)-2 << (u64)1", signed(-4)); + + // Bitwise not + test(b"~0", signed(-1)); + test(b"~0u", unsigned(4294967295)); + test(b"~(u8)0u", unsigned(4294967295)); + test(b"~((u32)0)", unsigned(4294967295)); + test(b"~0 == -1", signed(1)); + test(b"(s8)~0 == -1", signed(1)); + test(b"(u32)~0 == -1u", signed(1)); + test(b"(u64)~0 == -1ull", signed(1)); + + // Logical not + test(b"!0", signed(1)); + test(b"!1", signed(0)); + test(b"!42", signed(0)); + test(b"!(s32)42", signed(0)); + test(b"!(u32)42", signed(0)); + + // Logical or + test(b"1 && 2", signed(1)); + + // Ternary + test(b"1 ? 1 : 0", signed(1)); + test(b"0 ? 1 : 0", signed(0)); + test(b"0 ? 1 : 0u", unsigned(0)); + test(b"-12 ? 42u : 0", unsigned(42)); + test(b"(s32)-12 ? (u8)42 : 0", unsigned(42)); + test(b"1 ? (int *)42 : (int *)43", unsigned(42)); + test(b"1 ? (int *)42 : (void *)43", unsigned(42)); + test(b"1 ? (void *)42 : (int *)43", unsigned(42)); + test(b"1 ? (void *)42 : (void *)43", unsigned(42)); + test(b"1 ? (int *)42 : (int *)0", unsigned(42)); + test(b"1 ? (int *)42 : (void *)0", unsigned(42)); + test(b"1 ? (void *)42 : (int *)0", unsigned(42)); + test(b"1 ? (void *)42 : (void *)0", unsigned(42)); + test(b"1 ? (int *)0 : (int *)0", unsigned(0)); + test(b"1 ? (int *)0 : (void *)0", unsigned(0)); + test(b"1 ? (void *)0 : (int *)0", unsigned(0)); + test(b"1 ? (void *)0 : (void *)0", unsigned(0)); + test(b"1 ? (int *)0 : (int *)42", unsigned(0)); + test(b"1 ? (int *)0 : (void *)42", unsigned(0)); + test(b"1 ? (void *)0 : (int *)42", unsigned(0)); + test(b"1 ? (void *)0 : (void *)42", unsigned(0)); + + // Casts + test(b"(int)0", signed(0)); + test(b"(s8)0", signed(0)); + test(b"(unsigned int)0", unsigned(0)); + test(b"(u32)0", unsigned(0)); + test(b"(unsigned int)-1", unsigned(4294967295)); + test(b"(u32)-1", unsigned(4294967295)); + test(b"(unsigned int)(unsigned char)-1", unsigned(255)); + test(b"(u32)(u8)-1", unsigned(255)); + test(b"(int)(unsigned int)-1", signed(-1)); + test(b"(s32)(u64)-1", signed(-1)); + test(b"(int)4294967295", signed(-1)); + test(b"(s32)4294967295", signed(-1)); + test(b"(int*)&1", addr(Value::I64Scalar(1))); + test(b"(s32*)&1", addr(Value::I64Scalar(1))); + test(b"(int*)1", unsigned(1)); + test(b"(s16*)1", unsigned(1)); + test(b"(void *)1ull", unsigned(1)); + test(b"((__u16)(__le16)1)", unsigned(1)); + + // Sizeof type + test(b"sizeof(char)", unsigned(1)); + test(b"sizeof(int)", unsigned(4)); + test(b"sizeof(unsigned long)", unsigned(8)); + test(b"sizeof(int *)", unsigned(8)); + test(b"sizeof(u8)", unsigned(1)); + test(b"sizeof(u64)", unsigned(8)); + test(b"sizeof(u8 *)", unsigned(8)); + + // Sizeof expr + test(b"sizeof(1)", unsigned(4)); + test(b"sizeof 1", unsigned(4)); + test(b"sizeof(1l)", unsigned(8)); + test(b"sizeof((long)1)", unsigned(8)); + test(b"sizeof((u64)1)", unsigned(8)); + test(b"sizeof(&1)", unsigned(8)); + test(b"sizeof((u8)&1)", unsigned(1)); + test(b"sizeof(*(unsigned int (*)[10])50)", unsigned(40)); + + // Address and deref + test(b"&1", addr(signed(1))); + test(b"(void *)&1", addr(signed(1))); + test(b"*(void *)&1", signed(1)); + test(b"*(u8*)(void *)&1", unsigned(1)); + test(b"*(u8*)(void *)&257", unsigned(1)); + test(b"*(u8*)(void *)&257ull", unsigned(1)); + test(b"*(u64*)(void*)(u64*)(void *)&1ull", unsigned(1)); + test(b"*(u64*)(u8*)(void*)(u64*)(void *)&1ull", unsigned(1)); + test(b"*(u32*)(u8*)(void*)(u32*)(void *)&1u", unsigned(1)); + test(b"&(u32)1", addr(unsigned(1))); + test(b"&REC->runtime_u32_field", addr(unsigned(44))); + test(b"*(unsigned int *)REC->runtime_u32_field", unsigned(257)); + test(b"*(u32 *)REC->runtime_u32_field", unsigned(257)); + test(b"*(signed int *)REC->runtime_u32_field", signed(257)); + test(b"*(s32 *)REC->runtime_u32_field", signed(257)); + test(b"*(unsigned int *)&REC->runtime_u32_field", unsigned(44)); + test(b"*(u32 *)&REC->runtime_u32_field", unsigned(44)); + test(b"(signed int)*&REC->runtime_u32_field", signed(44)); + test(b"(s32)*&REC->runtime_u32_field", signed(44)); + test(b"*&1", signed(1)); + test(b"*&*&1", signed(1)); + test(b"(s32)*&*&1", signed(1)); + test(b"*(&1)", signed(1)); + test(b"*(2, &1)", signed(1)); + test(b"*(0 ? &1 : &2)", signed(2)); + test(b"*(1 ? &1 : &2)", signed(1)); + test(b"*(1 ? &(s32)1 : &(s32)2)", signed(1)); + test(b"*(1 ? &(s32)1 : &(int)2)", signed(1)); + + test(b"*(char *)42", unsigned(104)); + test(b"*(u8 *)42", unsigned(104)); + test(b"*(unsigned char *)42", unsigned(104)); + test(b"*(signed char *)42", signed(104)); + test(b"*(s8 *)42", signed(104)); + test(b"*(unsigned long *)43", unsigned(105)); + test(b"*(u64 *)43", unsigned(105)); + test(b"*(char *)44", unsigned(1)); + test(b"*(u8 *)44", unsigned(1)); + test(b"*(char *)(int *)(short *)44", unsigned(1)); + test(b"*(char *)(s32 *)(short *)44", unsigned(1)); + test(b"*(u8 *)(int *)(short *)44", unsigned(1)); + test(b"*(u8 *)(s32 *)(short *)44", unsigned(1)); + test(b"*(u8 *)(s32 *)(s16 *)44", unsigned(1)); + test(b"*(char *)(int *)(s16 *)44", unsigned(1)); + test(b"*(u8 *)(int *)(s16 *)44", unsigned(1)); + + test(b"((char *)42)[0]", unsigned(104)); + test(b"((u8 *)42)[0]", unsigned(104)); + test(b"((char *)42)[1]", unsigned(101)); + test(b"((u8 *)42)[1]", unsigned(101)); + test(b"*(int *)44", signed(257)); + test(b"*(s32 *)44", signed(257)); + test(b"*(unsigned int *)44", unsigned(257)); + test(b"*(u32 *)44", unsigned(257)); + test(b"*(unsigned int *)47", unsigned(30)); + test(b"( *(unsigned int (*)[10])47 )[1]", unsigned(31)); + test(b"( *(u32 (*)[10])47 )[1]", unsigned(31)); + test(b"((u32 *)47)[1]", unsigned(31)); + + test(b"**(unsigned int **)45", unsigned(257)); + test(b"**(u32 **)45", unsigned(257)); + test(b"( *(unsigned int * (*)[10])44 )[0]", unsigned(257)); + test(b"( *(u32 * (*)[10])44 )[0]", unsigned(257)); + + // Array + test(b"(&1)[0]", signed(1)); + test(b"((s32*)&1)[0]", signed(1)); + test(b"(42 ? &1 : &2)[0]", signed(1)); + test(b"(42 ? (s8*)&1 : (s8*)&2)[0]", signed(1)); + test(b"(0 ? &1 : &2)[0]", signed(2)); + test(b"(0 ? (s8*)&1 : (signed char*)&2)[0]", signed(2)); + test(b"(REC->runtime_zero_field ? &1 : &2)[0]", signed(2)); + test(b"((s8)REC->runtime_zero_field ? &1 : &2)[0]", signed(2)); + + // Field access + test(b"REC->u32_field", unsigned(42)); + test(b"(u64)REC->u32_field", unsigned(42)); + test(b"(*&REC) -> u32_field", unsigned(42)); + test(b"(*(0 ? &(REC) : &(REC))) -> u32_field", unsigned(42)); + test(b"(*(1 ? &(REC) : &(REC))) -> u32_field", unsigned(42)); + + test(b"sizeof(REC->u32_array_field)", unsigned(4 * 2)); + test(b"sizeof((int [2])REC->u32_array_field)", unsigned(4 * 2)); + test(b"sizeof((u8 [2])REC->u32_array_field)", unsigned(2)); + test(b"REC->u32_array_field[0]", unsigned(42)); + test(b"*REC->u32_array_field", unsigned(42)); + + test(b"REC->u32_dynarray_field[0]", unsigned(42)); + test(b"((u32 *)REC->u32_dynarray_field)[0]", unsigned(42)); + test(b"REC->u32_dynarray_field[1]", unsigned(43)); + test(b"((u32 *)REC->u32_dynarray_field)[1]", unsigned(43)); + test(b"*REC->u32_dynarray_field", unsigned(42)); + test(b"*(u32*)REC->u32_dynarray_field", unsigned(42)); + + test(b"*REC->owned_string_field", unsigned(102)); + test(b"REC->owned_string_field[6]", unsigned(0)); + test(b"((char *)REC->owned_string_field)[6]", unsigned(0)); + test(b"REC->str_field", hello_world.clone()); + test(b"(char *)REC->str_field", hello_world.clone()); + // Unfortunately, it is not easy to preserve the Array value + // across a &* chain, as this would either necessitate to not simplify + // the *& chains in the sub-expression, or be very brittle and strictly + // match &* with nothing in-between which is quite useless. + // + // So we end up with dereferencing the Array, which provides its + // first item, and then we take the address of that. + test(b"*&*REC->str_field", unsigned(104)); + test(b"*(u8*)&*REC->str_field", unsigned(104)); + test(b"*REC->str_field", unsigned(104)); + test(b"*(u8*)REC->str_field", unsigned(104)); + test(b"REC->str_field[0]", unsigned(104)); + test(b"((u8*)REC->str_field)[0]", unsigned(104)); + test(b"REC->str_field[1]", unsigned(101)); + test(b"((u8*)REC->str_field)[1]", unsigned(101)); + test(b"REC->str_field[6]", unsigned(119)); + test(b"((u8*)REC->str_field)[6]", unsigned(119)); + test(b"REC->str_field[11]", unsigned(0)); + test(b"((u8*)REC->str_field)[11]", unsigned(0)); + + test(b"*(signed char*)REC->str_field", signed(104)); + test(b"*(s8*)REC->str_field", signed(104)); + test(b"(int)*REC->str_field", signed(104)); + test(b"(s32)*REC->str_field", signed(104)); + test(b"(int)REC->str_field[0]", signed(104)); + test(b"(s32)REC->str_field[0]", signed(104)); + + // Combined + test(b"(65536/((1UL) << 12) + 1)", unsigned(17)); + test(b"(65536/((1UL) << 12) + (s32)1)", unsigned(17)); + + test(b"*(int*)(&(-1))", signed(-1)); + test(b"*(s32*)((s32 *)&(-1))", signed(-1)); + test(b"*(unsigned int*)(&-1u)", unsigned(4294967295)); + test(b"*(u32 *)(&-1u)", unsigned(4294967295)); + test(b"*(unsigned int*)(char*)(&-1u)", unsigned(4294967295)); + test(b"*(u32 *)(u8 *)(&-1u)", unsigned(4294967295)); + test(b"*(unsigned int *)(u8 *)(&-1u)", unsigned(4294967295)); + // This is not UB since any value can be accessed via a char pointer: + // https://port70.net/~nsz/c/c11/n1570.html#6.5p7 + test(b"*(unsigned char*)(int*)(&(-1))", unsigned(255)); + test(b"*(u8 *)(int*)(&(-1))", unsigned(255)); + + test(b"(int*)1 == (int*)1", signed(1)); + test(b"(s32*)1 == (s32*)1", signed(1)); + test(b"(int*)1 == (char*)1", signed(1)); + test(b"(s32*)1 == (u8*)1", signed(1)); + + test(b"(int)(int*)1 == 1", signed(1)); + test(b"(s32)(s32*)1 == 1", signed(1)); + test(b"(int)(s32*)1 == 1", signed(1)); + test(b"(s32)(int*)1 == 1", signed(1)); + test(b"(int)(int*)1 == 2", signed(0)); + test(b"(s32)(int*)1 == 2", signed(0)); + test(b"(int)(s32*)1 == 2", signed(0)); + test(b"(s32)(s32*)1 == 2", signed(0)); + test(b"(char)(int*)256 == 0", signed(1)); + test(b"(u8)(s32*)256 == 0", signed(1)); + test(b"(signed char)(s32*)256 == 0", signed(1)); + + test( + b"*((char)(int*)256 == 0 ? (&42, &43) : &2) == 43", + signed(1), + ); + test( + b"*((u8)(s32*)256 == 0 ? ((s32*)&42, &43) : &(s32)2) == (s64)43", + signed(1), + ); + + test(b"1 ? '*' : ' '", signed(42)); + test(b"(1 && 2) ? '*' : ' '", signed(42)); + test(b"1 && 2 ? '*' : ' '", signed(42)); + test(b"(int) 1 && (int) 2 ? '*' : ' '", signed(42)); + + // Extension macros + test(b"__builtin_constant_p(sizeof(struct page))", signed(0)); + test(br#"__builtin_constant_p("foo")"#, signed(0)); + test(br#"__builtin_constant_p("(a)")"#, signed(0)); + test(br#"__builtin_constant_p("(a")"#, signed(0)); + test(br#"__builtin_constant_p("a)")"#, signed(0)); + test(br#"__builtin_constant_p("a)\"")"#, signed(0)); + test(br#"__builtin_constant_p("\"a)")"#, signed(0)); + test(br#"__builtin_constant_p(')')"#, signed(0)); + test(br#"__builtin_constant_p('\'', "'")"#, signed(0)); + } +} diff --git a/tools/trace-parser/traceevent/src/closure.rs b/tools/trace-parser/traceevent/src/closure.rs new file mode 100644 index 0000000000..2547f12ab5 --- /dev/null +++ b/tools/trace-parser/traceevent/src/closure.rs @@ -0,0 +1,34 @@ +macro_rules! make_closure_coerce { + ($name:ident, $bound1:tt $(+ $bounds2:tt)*) => { + #[inline] + fn $name(f: F) -> F + where + F: $bound1 $(+ $bounds2)* + { + f + } + } +} +pub(crate) use make_closure_coerce; + +macro_rules! make_closure_coerce_type { + ($name:ident, $ty:ty) => { + #[inline] + fn $name(f: $ty) -> $ty { + f + } + }; +} +pub(crate) use make_closure_coerce_type; + +// This is a workaround for the broken HRTB inference: +// https://github.com/rust-lang/rust/issues/41078 +macro_rules! closure { + ($bound1:tt $(+ $bounds2:tt)*, $closure:expr) => { + { + $crate::closure::make_closure_coerce!(coerce, $bound1 $(+ $bounds2)*); + coerce($closure) + } + } +} +pub(crate) use closure; diff --git a/tools/trace-parser/traceevent/src/compress.rs b/tools/trace-parser/traceevent/src/compress.rs new file mode 100644 index 0000000000..a6746f6626 --- /dev/null +++ b/tools/trace-parser/traceevent/src/compress.rs @@ -0,0 +1,166 @@ +use core::{ + cell::RefCell, + fmt::{Debug, Formatter}, +}; +use std::io; + +use thread_local::ThreadLocal; + +use crate::header::MemSize; + +pub trait Decompressor: Send + Sync { + fn decompress_into(&self, src: &[u8], dst: &mut [u8]) -> io::Result<()>; + + fn decompress<'a>(&'a mut self, src: &'a [u8], dst_count: MemSize) -> io::Result> { + let mut buffer = vec![0; dst_count]; + self.decompress_into(src, &mut buffer)?; + Ok(buffer) + } + + fn to_dyn(&self) -> Box; +} + +pub struct DynDecompressor { + inner: Box, +} + +impl Clone for DynDecompressor { + #[inline] + fn clone(&self) -> Self { + DynDecompressor { + inner: self.inner.to_dyn(), + } + } +} + +impl DynDecompressor { + pub fn new(inner: D) -> Self { + DynDecompressor { + inner: Box::new(inner), + } + } +} + +impl Debug for DynDecompressor { + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + f.debug_struct("Decompressor").finish_non_exhaustive() + } +} + +impl Decompressor for DynDecompressor { + #[inline] + fn decompress_into(&self, src: &[u8], dst: &mut [u8]) -> io::Result<()> { + self.inner.decompress_into(src, dst) + } + + #[inline] + fn to_dyn(&self) -> Box { + Box::new(self.clone()) + } +} + +#[cfg(target_arch = "x86_64")] +pub(crate) struct ZstdDecompressor { + inner: ThreadLocal>>, +} + +#[cfg(target_arch = "x86_64")] +impl ZstdDecompressor { + pub fn new() -> Self { + ZstdDecompressor { + inner: ThreadLocal::new(), + } + } + + fn inner(&self) -> &RefCell> { + self.inner.get_or(move || { + RefCell::new( + zstd::bulk::Decompressor::new().expect("Could not create zstd::bulk::Decompressor"), + ) + }) + } +} + +#[cfg(target_arch = "x86_64")] +impl Decompressor for ZstdDecompressor { + #[inline] + fn to_dyn(&self) -> Box { + Box::new(Self::new()) + } + + fn decompress_into(&self, src: &[u8], dst: &mut [u8]) -> io::Result<()> { + let count = self.inner().borrow_mut().decompress_to_buffer(src, dst)?; + if count == dst.len() { + Ok(()) + } else { + Err(io::Error::new( + io::ErrorKind::Other, + format!("Zstd expected {} bytes, decompressed {count}", dst.len()), + )) + } + } +} + +#[cfg(not(target_arch = "x86_64"))] +pub(crate) struct ZstdDecompressor; + +#[cfg(not(target_arch = "x86_64"))] +impl ZstdDecompressor { + pub fn new() -> Self { + ZstdDecompressor + } +} + +#[cfg(not(target_arch = "x86_64"))] +impl Decompressor for ZstdDecompressor { + #[inline] + fn to_dyn(&self) -> Box { + Box::new(Self::new()) + } + + fn decompress_into(&self, src: &[u8], dst: &mut [u8]) -> io::Result<()> { + use std::io::Read as _; + let mut decoder = ruzstd::StreamingDecoder::new(src).map_err(io::Error::other)?; + decoder.read_exact(dst) + } +} + +pub(crate) struct ZlibDecompressor { + inner: ThreadLocal>, +} + +impl ZlibDecompressor { + pub fn new() -> Self { + ZlibDecompressor { + inner: ThreadLocal::new(), + } + } + pub fn inner(&self) -> &RefCell { + self.inner + .get_or(|| RefCell::new(libdeflater::Decompressor::new())) + } +} + +impl Decompressor for ZlibDecompressor { + #[inline] + fn to_dyn(&self) -> Box { + Box::new(Self::new()) + } + + fn decompress_into(&self, src: &[u8], dst: &mut [u8]) -> io::Result<()> { + let count = self + .inner() + .borrow_mut() + .zlib_decompress(src, dst) + .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; + if count == dst.len() { + Ok(()) + } else { + Err(io::Error::new( + io::ErrorKind::Other, + format!("Zlib expected {} bytes, decompressed {count}", dst.len()), + )) + } + } +} diff --git a/tools/trace-parser/traceevent/src/cparser.rs b/tools/trace-parser/traceevent/src/cparser.rs new file mode 100644 index 0000000000..8e1b893e85 --- /dev/null +++ b/tools/trace-parser/traceevent/src/cparser.rs @@ -0,0 +1,4198 @@ +use core::{ + fmt, + fmt::{Debug, Formatter}, + ops::Deref, + str::from_utf8, +}; +use std::{ + rc::Rc, + string::{String as StdString, ToString}, + sync::Arc, +}; + +use bytemuck::cast_slice; +use itertools::Itertools as _; +use nom::{ + branch::alt, + bytes::complete::{is_not, tag, take}, + character::complete::{alpha1, alphanumeric1, anychar, char, u64 as dec_u64}, + combinator::{all_consuming, cut, fail, map, map_res, not, opt, recognize, success}, + error::{context, FromExternalError}, + multi::{fold_many1, many0, many0_count, many1, many_m_n, separated_list0, separated_list1}, + number::complete::u8, + sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, + AsBytes, Finish as _, Parser, +}; +use smartstring::alias::String; + +use crate::{ + cinterp::{ + Bitmap, CompileEnv, CompileError, EmptyEnv, EvalEnv, EvalError, new_dyn_evaluator, Evaluator, + InterpError, Value, + }, + grammar::{grammar, Span}, + header::{Abi, Endianness, FileSize, Identifier, LongSize}, + parser::{ + error, failure, hex_u64, lexeme, map_res_cut, parenthesized, success_with, FromParseError, + NomError, VerboseParseError, + }, + scratch::{OwnedScratchBox, ScratchVec}, + str::Str, + error::convert_err_impl, +}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Type { + Void, + + Bool, + + U8, + I8, + + U16, + I16, + + U32, + I32, + + U64, + I64, + + // Opaque type of variables + Variable(Identifier), + + // Complete black box used in cases where we want to completely hide any + // information about the type. + Unknown, + + Typedef(Box, Identifier), + Enum(Box, Identifier), + Struct(Identifier), + Union(Identifier), + Function(Box, Vec), + + Pointer(Box), + + Array(Box, ArrayKind), + DynamicScalar(Box, DynamicKind), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DynamicKind { + Dynamic, + DynamicRel, +} + +#[derive(Debug, Clone)] +pub enum ArrayKind { + Fixed(Result>), + Dynamic(DynamicKind), + ZeroLength, +} + +impl PartialEq for ArrayKind { + fn eq(&self, other: &ArrayKind) -> bool { + match (self, other) { + (ArrayKind::Fixed(Ok(x1)), ArrayKind::Fixed(Ok(x2))) => x1 == x2, + (ArrayKind::Fixed(Err(_)), ArrayKind::Fixed(Err(_))) => true, + (ArrayKind::Dynamic(kind1), ArrayKind::Dynamic(kind2)) => kind1 == kind2, + (ArrayKind::ZeroLength, ArrayKind::ZeroLength) => true, + _ => false, + } + } +} + +impl Eq for ArrayKind {} + +#[derive(Clone)] +pub struct Declarator { + identifier: Option, + // Use Rc<> so that Declarator can be Clone, which is necessary to for the + // packrat cache. + modify_typ: Rc Type>, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Declaration { + pub identifier: Identifier, + pub typ: Type, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ParamDeclaration { + pub identifier: Option, + pub typ: Type, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Expr { + Uninit, + Variable(Type, Identifier), + + InitializerList(Vec), + DesignatedInitializer(Box, Box), + CompoundLiteral(Type, Vec), + + IntConstant(Type, u64), + CharConstant(Type, u64), + EnumConstant(Type, Identifier), + StringLiteral(String), + + Addr(Box), + Deref(Box), + Plus(Box), + Minus(Box), + Tilde(Box), + Bang(Box), + Cast(Type, Box), + SizeofType(Type), + SizeofExpr(Box), + PreInc(Box), + PreDec(Box), + PostInc(Box), + PostDec(Box), + + MemberAccess(Box, Identifier), + FuncCall(Box, Vec), + Subscript(Box, Box), + Assign(Box, Box), + + ExtensionMacro(Arc), + ExtensionMacroCall(ExtensionMacroCall), + + Mul(Box, Box), + Div(Box, Box), + Mod(Box, Box), + Add(Box, Box), + Sub(Box, Box), + + Eq(Box, Box), + NEq(Box, Box), + LoEq(Box, Box), + HiEq(Box, Box), + Hi(Box, Box), + Lo(Box, Box), + + And(Box, Box), + Or(Box, Box), + + LShift(Box, Box), + RShift(Box, Box), + BitAnd(Box, Box), + BitOr(Box, Box), + BitXor(Box, Box), + + Ternary(Box, Box, Box), + CommaExpr(Vec), + + Evaluated(Type, Value<'static>), +} + +impl Expr { + pub(crate) fn record_field(field: Identifier) -> Self { + Expr::MemberAccess( + // The extra Deref(Addr(...)) layer is important as it accurately + // matches what the C sources would be, i.e. REC->field. This allows + // comparing this expression for equality with parsed source to + // check if we got a typical field access. + Box::new(Expr::Deref(Box::new(Expr::Addr(Box::new(Expr::Variable( + Type::Variable("REC".into()), + "REC".into(), + )))))), + field, + ) + } + #[inline] + pub(crate) fn is_record_field(&self, field: &str) -> bool { + // This must match the record_field() definition + if let Expr::MemberAccess(expr, _field) = self { + if _field == field { + if let Expr::Deref(expr) = (*expr).deref() { + if let Expr::Addr(expr) = (*expr).deref() { + if let Expr::Variable(Type::Variable(name), name2) = (*expr).deref() { + if name == "REC" && name2 == "REC" { + return true; + } + } + } + } + } + } + + false + } +} + +#[derive(Clone)] +pub struct ExtensionMacroCall { + pub args: Vec, + pub desc: Arc, + pub compiler: ExtensionMacroCallCompiler, +} + +impl Debug for ExtensionMacroCall { + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + f.write_fmt(format_args!( + "ExtensionMacroCall{{ typ: {:?}, call: {}({}) }}", + &self.compiler.ret_typ, + &self.desc.name, + &StdString::from_utf8_lossy(&self.args) + )) + } +} + +impl PartialEq for ExtensionMacroCall { + fn eq(&self, other: &Self) -> bool { + self.compiler.ret_typ == other.compiler.ret_typ + && self.desc == other.desc + && self.args == other.args + } +} + +impl Eq for ExtensionMacroCall {} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ExtensionMacroCallType { + Type(Type), + AsExpr(Box), +} + +impl ExtensionMacroCallType { + pub fn typ<'ce, CE>(&self, cenv: &CE, abi: &Abi) -> Result + where + CE: CompileEnv<'ce>, + { + match self { + ExtensionMacroCallType::Type(typ) => Ok(typ.clone()), + ExtensionMacroCallType::AsExpr(expr) => expr.typ(cenv, abi), + } + } +} + +type Compiler = dyn for<'ceref, 'ce> Fn( + &'ceref (dyn CompileEnv<'ce> + 'ceref), + &Abi, + ) -> Result, CompileError> + + Send + + Sync; + +#[derive(Clone)] +pub struct ExtensionMacroCallCompiler { + pub ret_typ: ExtensionMacroCallType, + pub compiler: Arc, +} + +type FunctionLikeExtensionMacroParser = Box< + dyn for<'a> Fn( + Span<'a, CGrammar>, + ) -> nom::IResult< + Span<'a, CGrammar>, + ExtensionMacroCallCompiler, + NomError>>, + > + Send + + Sync, +>; + +pub(crate) enum ExtensionMacroKind { + FunctionLike { + parser: FunctionLikeExtensionMacroParser, + }, + ObjectLike { + value: Value<'static>, + typ: Type, + }, +} + +pub struct ExtensionMacroDesc { + name: Identifier, + pub(crate) kind: ExtensionMacroKind, +} + +impl ExtensionMacroDesc { + #[inline] + pub fn new_function_like(name: Identifier, parser: FunctionLikeExtensionMacroParser) -> Self { + ExtensionMacroDesc { + name, + kind: ExtensionMacroKind::FunctionLike { parser }, + } + } + + #[inline] + pub fn new_object_like(name: Identifier, typ: Type, value: Value<'static>) -> Self { + ExtensionMacroDesc { + name, + kind: ExtensionMacroKind::ObjectLike { value, typ }, + } + } +} + +impl PartialEq for ExtensionMacroDesc { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} +impl Eq for ExtensionMacroDesc {} + +impl Debug for ExtensionMacroDesc { + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + f.debug_struct("ExtensionMacroDesc") + .field("name", &self.name) + .finish_non_exhaustive() + } +} + +#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum CParseError { + #[error("Could not decode UTF-8 string: {0}")] + DecodeUtf8(StdString), + #[error("No identifier found in a declaration of type {0:?}")] + DeclarationWithoutIdentifier(Type), + #[error("Found nested __data_loc in type: {0:?}")] + NestedDataLoc(Type), + #[error("The outermost type of a dynamic array must be an array: {0:?}")] + DataLocArrayNotArray(Type), + + // Kind of redundant with DataLocWithoutIdentifier but the different error + // message might be genuinely important considered this is not valid ISO C + // syntax to start with. + #[error( + "A __data_loc array declaration is expected to have an identifier following the last []: {0:?}" + )] + DataLocArrayWithoutIdentifier(Type), + #[error("Found no identifier in the scalar __data_loc declaration")] + DataLocWithoutIdentifier, + #[error( + "Found ambiguous identifiers in the scalar __data_loc declaration: \"{0}\" or \"{1}\"" + )] + DataLocAmbiguousIdentifier(Identifier, Identifier), + + #[error("Invalid type name (incompatible int/long/short/char usage)")] + InvalidTypeName, + + #[error( + "Invalid integer constant ({0}): the value does not fit in the range of any of the allowed types" + )] + InvalidIntegerConstant(u64), + + #[error("Character value is out of range ({0}), only 8 bit values are supported")] + CharOutOfRange(u64), + + #[error("Invalid variable identifier \"{0}\". Only the REC identifier is recognized as a variable, every other identifier is assumed to be an enumeration constant")] + InvalidVariableIdentifier(Identifier), + + #[error("Could not guess the type of the expression: {0:?}")] + CouldNotGuessType(Expr), + + #[error("String encoding prefix syntax is not supported")] + UnsupportedEncodingPrefix, + + #[error("Object-like macro {0:?} cannot be called since it is not a function-like macro")] + CannotCallObjectLikeMacro(Identifier), + + #[error("Could not recognize input as C(this snippet might use unsupported C constructs): {0}")] + ParseError(VerboseParseError), + + #[error("Could not interpret expression: {0}")] + InterpError(Box), +} + +convert_err_impl!(InterpError, InterpError, CParseError); + +impl FromParseError> for CParseError +where + I: AsRef<[u8]>, + I2: AsRef<[u8]>, +{ + fn from_parse_error(input: I, err: &nom::error::VerboseError) -> Self { + CParseError::ParseError(VerboseParseError::new(input, err)) + } +} + +fn eval_unsigned(expr: Expr, abi: &Abi) -> Result { + expr.eval_const(abi, |res| { + let x = res?; + match x { + Value::U64Scalar(x) => Ok(x), + Value::I64Scalar(x) if x > 0 => Ok(x.unsigned_abs()), + val => Err(InterpError::EvalError(Box::new(EvalError::IllegalType(val.into_static().ok())))), + } + }) +} + +fn resolve_extension_macro(name: &str, abi: &Abi) -> Result { + let abi = abi.clone(); + + fn print_symbolic( + abi: Abi, + ) -> Result { + Ok(ExtensionMacroKind::FunctionLike { + parser: Box::new(move |input| { + map_res_cut( + tuple(( + // value to format + CGrammar::assignment_expr(), + lexeme(char(',')), + // delimiter + move |input| { + if EXACT_MATCH { + success(Expr::Uninit).parse(input) + } else { + terminated(CGrammar::assignment_expr(), lexeme(char(','))) + .parse(input) + } + }, + // flags + terminated( + separated_list0( + lexeme(char(',')), + delimited( + // E.g. {(1 << 1), "I_DIRTY_DATASYNC"} + lexeme(char('{')), + separated_pair( + CGrammar::assignment_expr(), + lexeme(char(',')), + CGrammar::assignment_expr(), + ), + lexeme(char('}')), + ), + ), + opt(lexeme(char(','))), + ), + )), + |(val, _, delim, flags)| { + let flags = flags + .into_iter() + .filter_map(|(mask, flag)| -> Option> { + || -> Result, _> { + let mask: u64 = mask.eval_const(&abi, |x| match x? { + Value::U64Scalar(x) => Ok(x), + Value::I64Scalar(x) => Ok(x as u64), + val => Err(InterpError::EvalError(Box::new( + EvalError::IllegalType(val.into_static().ok()), + ))), + })?; + + let flag: Option = flag.eval_const(&abi, |s| { + match s? { + // We can end up with a null pointer value, which is + // simply the terminator of the array. It is normally + // provided by __print_symbolic() itself but some + // events specify it explicitly themselves, especially + // when the array is shared with some other formatting + // code. + Value::U64Scalar(0) | Value::I64Scalar(0) => Ok(None), + s => match s.to_str() { + Some(s) => Ok(Some(s.into())), + None => Err(InterpError::EvalError(Box::new( + EvalError::IllegalType(s.into_static().ok()), + ))), + }, + } + })?; + match flag { + Some(flag) => Ok(Some((mask, flag))), + None => Ok(None), + } + }() + .transpose() + }) + .collect::, InterpError>>()?; + + let compiler = Arc::new(move |cenv: &dyn CompileEnv<'_>, abi: &_| { + let cval = val.clone().compile(&cenv, abi)?; + #[allow(clippy::type_complexity)] + let cdelim: Box< + dyn Fn(&dyn EvalEnv) -> Result + Send + Sync, + > = if EXACT_MATCH { + Box::new(|_env| Ok("".into())) + } else { + let cdelim = delim.clone().compile(&cenv, abi)?; + Box::new(move |env| { + let cdelim = cdelim.eval(env)?; + let cdelim = cdelim.deref_ptr(env)?; + match cdelim.to_str() { + Some(s) => Ok(s.into()), + None => Err(EvalError::IllegalType(cdelim.into_static().ok())), + } + }) + }; + + let flags = flags.clone(); + + let eval = new_dyn_evaluator(move |env: &_| { + let val = match cval.eval(env)? { + Value::U64Scalar(x) => Ok(x), + Value::I64Scalar(x) => Ok(x as u64), + val => Err(EvalError::IllegalType(val.into_static().ok())), + }?; + let delim = cdelim(env)?; + let flags = flags.clone(); + + let writer = OwnedScratchBox::new_in( + move |out: &mut dyn fmt::Write| { + let mut closure = || -> Result<(), fmt::Error> { + let mut first = true; + let mut remaining = val; + for (flag, s) in &flags { + let found = if EXACT_MATCH { + val == *flag + } else { + (val & flag) != 0 + }; + + if found { + if !first { + write!(out, "{delim}")?; + } + write!(out, "{s}")?; + + remaining &= !*flag; + first = false; + + if EXACT_MATCH && found { + break; + } + }; + } + if remaining != 0 { + if !first { + write!(out, "{delim}")?; + } + write!(out, "{remaining:#x}")?; + } + Ok(()) + }; + closure() + .expect("could not render symbolic values to string") + }, + env.scratch(), + ); + + // let f = crate::scratch::OwnedScratchBox_as_dyn!(writer, StringProducer); + // Ok(Value::Str(Str::new_owned("foo".into()))) + Ok(Value::Str(Str::new_procedural(writer))) + }); + Ok(eval) + }); + + Ok(ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::Type(Type::Pointer(Box::new( + abi.char_typ(), + ))), + compiler, + }) + }, + ) + .parse(input) + }), + }) + } + + fn print_array_hex( + abi: Abi, + separator: &'static str, + ) -> Result { + Ok(ExtensionMacroKind::FunctionLike { + parser: Box::new(move |input| { + map_res_cut( + tuple(( + // Array to format + CGrammar::assignment_expr(), + lexeme(char(',')), + // Array size + CGrammar::assignment_expr(), + )), + |(val, _, array_size)| { + let compiler = Arc::new(move |cenv: &dyn CompileEnv<'_>, abi: &_| { + let cval = val.clone().compile(&cenv, abi)?; + let carray_size = array_size.clone().compile(&cenv, abi)?; + + let eval = new_dyn_evaluator(move |env: &_| { + let array_size = match carray_size.eval(env)? { + Value::U64Scalar(x) => Ok(x), + Value::I64Scalar(x) => Ok(x as u64), + val => Err(EvalError::IllegalType(val.into_static().ok())), + }?; + + macro_rules! print_array { + ($arr:expr) => {{ + let writer = OwnedScratchBox::new_in( + move |out: &mut dyn fmt::Write| { + let mut closure = || -> Result<(), fmt::Error> { + let mut first = true; + for x in $arr + .into_iter() + .take(array_size.try_into().unwrap()) + { + if !first { + out.write_str(separator)?; + } + write!(out, "{:02x}", x)?; + first = false; + } + Ok(()) + }; + closure() + .expect("could not render hex array to string") + }, + env.scratch(), + ); + + Ok(Value::Str(Str::new_procedural(writer))) + }}; + } + + match cval.eval(env)? { + Value::Raw(_, arr) => print_array!(arr.iter()), + Value::Str(s) => print_array!(s.bytes().chain([0])), + + Value::U8Array(arr) => print_array!(arr.iter()), + Value::I8Array(arr) => print_array!(arr.iter()), + + val => Err(EvalError::IllegalType(val.into_static().ok())), + } + }); + Ok(eval) + }); + + Ok(ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::Type(Type::Pointer(Box::new( + abi.char_typ(), + ))), + compiler, + }) + }, + ) + .parse(input) + }), + }) + } + + fn object_like_macro( + typ: Type, + value: Value<'static>, + ) -> Result { + Ok(ExtensionMacroKind::ObjectLike { value, typ }) + } + + // TODO: support functions in https://elixir.free-electrons.com/linux/v6.6.9/source/include/uapi/linux/swab.h + match name { + "true" => object_like_macro(Type::Bool, Value::I64Scalar(1)), + "false" => object_like_macro(Type::Bool, Value::I64Scalar(0)), + "TASK_COMM_LEN" => object_like_macro(Type::I32, Value::I64Scalar(16)), + + "__builtin_constant_p" => Ok(ExtensionMacroKind::FunctionLike { + parser: Box::new(move |input| { + map(many0(anychar), |_| ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::Type(Type::I32), + compiler: Arc::new(move |_, _| { + // As per the doc, 0 is an acceptable return value in any context: + // > A return of 0 does not indicate that the expression is not a + // > constant, but merely that GCC cannot prove it is a constant within + // > the constraints of the active set of optimization options. + // + // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html#index-_005f_005fbuiltin_005fconstant_005fp + let eval = new_dyn_evaluator(move |_| Ok(Value::I64Scalar(0))); + Ok(eval) + }), + }) + .parse(input) + }), + }), + + "__print_ns_to_secs" => Ok(ExtensionMacroKind::FunctionLike { + parser: Box::new(move |input| { + map_res_cut(CGrammar::assignment_expr(), |expr| { + Ok(ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::Type(Type::U64), + compiler: Arc::new(move |cenv: &dyn CompileEnv, abi: &_| { + let cexpr = expr.clone().compile(&cenv, abi)?; + + let eval = new_dyn_evaluator(move |env: &_| match cexpr.eval(env)? { + Value::U64Scalar(x) => Ok(Value::U64Scalar(x / 1_000_000_000)), + Value::I64Scalar(x) => Ok(Value::I64Scalar(x / 1_000_000_000)), + val => Err(EvalError::IllegalType(val.into_static().ok())), + }); + Ok(eval) + }), + }) + }) + .parse(input) + }), + }), + "__print_ns_without_secs" => Ok(ExtensionMacroKind::FunctionLike { + parser: Box::new(move |input| { + map_res_cut(CGrammar::assignment_expr(), |expr| { + Ok(ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::Type(Type::U32), + compiler: Arc::new(move |cenv: &dyn CompileEnv, abi: &_| { + let cexpr = expr.clone().compile(&cenv, abi)?; + + let eval = new_dyn_evaluator(move |env: &_| match cexpr.eval(env)? { + Value::U64Scalar(x) => Ok(Value::U64Scalar(x % 1_000_000_000)), + Value::I64Scalar(x) => Ok(Value::I64Scalar(x % 1_000_000_000)), + val => Err(EvalError::IllegalType(val.into_static().ok())), + }); + Ok(eval) + }), + }) + }) + .parse(input) + }), + }), + "__get_sockaddr" | "__get_sockaddr_rel" => Ok(ExtensionMacroKind::FunctionLike { + parser: Box::new(move |input| { + map_res_cut(CGrammar::identifier(), |field| { + Ok(ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::AsExpr(Box::new(Expr::record_field( + field.clone(), + ))), + compiler: Arc::new(move |cenv: &dyn CompileEnv, abi: &_| { + let cexpr = Expr::record_field(field.clone()).compile(&cenv, abi)?; + + let eval = new_dyn_evaluator(move |env: &_| match cexpr.eval(env)? { + val @ Value::U8Array(_) => Ok(val), + val => Err(EvalError::IllegalType(val.into_static().ok())), + }); + Ok(eval) + }), + }) + }) + .parse(input) + }), + }), + "__get_dynamic_array" | "__get_rel_dynamic_array" | "__get_str" | "__get_rel_str" => { + Ok(ExtensionMacroKind::FunctionLike { + parser: Box::new(move |input| { + map_res_cut(CGrammar::identifier(), |field| { + Ok(ExtensionMacroCallCompiler { + // The type of the array is the same as REC->field. We + // don't have access to a compilation environment here + // that would let us resolve that type immediately, but + // we can simply defer that to a later stage when all + // types are known. + ret_typ: ExtensionMacroCallType::AsExpr(Box::new(Expr::record_field( + field.clone(), + ))), + compiler: Arc::new(move |cenv: &dyn CompileEnv, abi: &_| { + // Compile "__get_str(field)" as "REC->field", since the compiler + // of REC->field will take care of getting the value and present + // it as an array already. + Expr::record_field(field.clone()).compile(&cenv, abi) + }), + }) + }) + .parse(input) + }), + }) + } + "__get_dynamic_array_len" | "__get_rel_dynamic_array_len" => { + Ok(ExtensionMacroKind::FunctionLike { + parser: Box::new(move |input| { + map_res_cut(CGrammar::identifier(), |field| { + Ok(ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::Type(abi.long_typ()), + compiler: Arc::new(move |cenv: &dyn CompileEnv, abi: &_| { + let expr = Expr::record_field(field.clone()).compile(&cenv, abi)?; + Ok(new_dyn_evaluator(move |env: &_| { + match expr.eval(env)? { + Value::Raw(_, arr) => Ok(arr.len()), + Value::Bitmap(bitmap) => Ok(bitmap.len()), + Value::Str(s) => Ok(s.len() + 1), + + Value::U8Array(arr) => Ok(arr.len()), + Value::I8Array(arr) => Ok(arr.len()), + + Value::U16Array(arr) => Ok(arr.len() * 2), + Value::I16Array(arr) => Ok(arr.len() * 2), + + Value::U32Array(arr) => Ok(arr.len() * 4), + Value::I32Array(arr) => Ok(arr.len() * 4), + + Value::U64Array(arr) => Ok(arr.len() * 8), + Value::I64Array(arr) => Ok(arr.len() * 8), + + val => Err(EvalError::IllegalType(val.into_static().ok())), + } + .map(|size| Value::U64Scalar(size.try_into().unwrap())) + })) + }), + }) + }) + .parse(input) + }), + }) + } + "__get_bitmask" | "__get_rel_bitmask" | "__get_cpumask" | "__get_rel_cpumask" => { + Ok(ExtensionMacroKind::FunctionLike { + parser: Box::new(move |input| { + map_res_cut(CGrammar::identifier(), |field| { + Ok(ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::Type(abi.char_typ()), + compiler: Arc::new(move |cenv: &dyn CompileEnv, abi: &_| { + let bitmap = + Expr::record_field(field.clone()).compile(&cenv, abi)?; + let abi = abi.clone(); + Ok(new_dyn_evaluator(move |env: &_| { + macro_rules! to_string { + ($bitmap:expr) => {{ + let writer = OwnedScratchBox::new_in( + move |out: &mut dyn fmt::Write| { + write!(out, "{}", $bitmap) + .expect("Could not render bitmap to string") + }, + env.scratch(), + ); + + Ok(Value::Str(Str::new_procedural(writer))) + }}; + } + + let bitmap = bitmap.eval(env)?; + match bitmap { + Value::Bitmap(bitmap) => to_string!(bitmap), + + // Older kernels had cpumasks declared as a dynamic array of unsigned long: + // __data_loc unsigned long[] target_cpus; + // + // More recent kernels now declare it this way: + // __data_loc cpumask_t target_cpus + // + // Note that even recent kernels still use + // unsigned long[] for other bitmasks. + // + // https://bugzilla.kernel.org/show_bug.cgi?id=217447 + Value::U32Array(arr) + if abi.long_size == LongSize::Bits32 => + { + let abi = abi.clone(); + to_string!(Bitmap::from_bytes(cast_slice(&arr), &abi)) + } + Value::U64Array(arr) + if abi.long_size == LongSize::Bits64 => + { + let abi = abi.clone(); + to_string!(Bitmap::from_bytes(cast_slice(&arr), &abi)) + } + val => Err(EvalError::IllegalType(val.into_static().ok())), + } + })) + }), + }) + }) + .parse(input) + }), + }) + } + "__print_flags" | "__print_flags_u64" => print_symbolic::(abi), + "__print_symbolic" | "__print_symbolic_u64" => print_symbolic::(abi), + + "__print_hex" => print_array_hex(abi, " "), + "__print_hex_str" => print_array_hex(abi, ""), + "__print_array" => { + Ok(ExtensionMacroKind::FunctionLike { + parser: Box::new(move |input| { + map_res_cut( + tuple(( + // Array to format + CGrammar::assignment_expr(), + lexeme(char(',')), + // Array size + CGrammar::assignment_expr(), + lexeme(char(',')), + // Item size + CGrammar::assignment_expr(), + )), + |(val, _, array_size, _, item_size)| { + let compiler = Arc::new(move |cenv: &dyn CompileEnv<'_>, abi: &_| { + let cval = val.clone().compile(&cenv, abi)?; + let carray_size = array_size.clone().compile(&cenv, abi)?; + let citem_size = item_size.clone().compile(&cenv, abi)?; + + let eval = new_dyn_evaluator(move |env: &_| { + let item_size: usize = match citem_size.eval(env)? { + Value::U64Scalar(x) => Ok(x), + Value::I64Scalar(x) => Ok(x as u64), + val => Err(EvalError::IllegalType(val.into_static().ok())), + }? + .try_into() + .unwrap(); + + let array_size: usize = match carray_size.eval(env)? { + Value::U64Scalar(x) => Ok(x), + Value::I64Scalar(x) => Ok(x as u64), + val => Err(EvalError::IllegalType(val.into_static().ok())), + }? + .try_into() + .unwrap(); + + macro_rules! print_array { + ($item_ty:ty, $arr:expr) => {{ + let item_size = core::mem::size_of::<$item_ty>(); + let writer = OwnedScratchBox::new_in( + move |out: &mut dyn fmt::Write| { + let mut closure = || { + write!(out, "[")?; + let mut first = true; + for x in $arr + .into_iter() + .take(array_size.try_into().unwrap()) + { + if !first { + write!(out, ",")?; + } + let x: $item_ty = x; + write!( + out, + "{:#0size$x}", + x, + size = 2 + item_size * 2 + )?; + first = false; + } + write!(out, "]") + }; + closure() + .expect("could not render array to string") + }, + env.scratch(), + ); + + (item_size, Ok(Value::Str(Str::new_procedural(writer)))) + }}; + } + + let (real_item_size, printed) = match cval.eval(env)? { + Value::Raw(_, arr) => { + Ok(print_array!(u8, arr.iter().copied())) + } + Value::Bitmap(bitmap) => match &bitmap.chunk_size { + LongSize::Bits32 => Ok(print_array!( + u32, + bitmap.into_iter().as_chunks().map(|x| x + .try_into() + .expect("Chunk requires more than 32 bits")) + )), + LongSize::Bits64 => Ok(print_array!( + u64, + bitmap.into_iter().as_chunks() + )), + }, + Value::Str(s) => Ok(print_array!(u8, s.bytes().chain([0]))), + + Value::U8Array(arr) => { + Ok(print_array!(u8, arr.iter().copied())) + } + Value::I8Array(arr) => { + Ok(print_array!(i8, arr.iter().copied())) + } + + Value::U16Array(arr) => { + Ok(print_array!(u16, arr.iter().copied())) + } + Value::I16Array(arr) => { + Ok(print_array!(i16, arr.iter().copied())) + } + + Value::U32Array(arr) => { + Ok(print_array!(u32, arr.iter().copied())) + } + Value::I32Array(arr) => { + Ok(print_array!(i32, arr.iter().copied())) + } + + Value::U64Array(arr) => { + Ok(print_array!(u64, arr.iter().copied())) + } + Value::I64Array(arr) => { + Ok(print_array!(i64, arr.iter().copied())) + } + + val => Err(EvalError::IllegalType(val.into_static().ok())), + }?; + + if real_item_size == item_size { + printed + } else { + Err(EvalError::ExtensionMacroError { + call: "__print_array(...)".into(), + error: format!("Wrong size for array item. Expected {item_size} bytes, got {real_item_size} bytes") + }) + } + }); + Ok(eval) + }); + + Ok(ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::Type(Type::Pointer(Box::new( + abi.char_typ(), + ))), + compiler, + }) + }, + ) + .parse(input) + }), + }) + } + "__print_hex_dump" => { + Ok(ExtensionMacroKind::FunctionLike { + parser: Box::new(move |input| { + map_res_cut( + tuple(( + // Prefix string + CGrammar::assignment_expr(), + lexeme(char(',')), + // Prefix type + CGrammar::assignment_expr(), + lexeme(char(',')), + // Row size + CGrammar::assignment_expr(), + lexeme(char(',')), + // Group size + CGrammar::assignment_expr(), + lexeme(char(',')), + // Buffer + CGrammar::assignment_expr(), + lexeme(char(',')), + // Length + CGrammar::assignment_expr(), + lexeme(char(',')), + // Ascii + CGrammar::assignment_expr(), + )), + |( + prefix_str, + _, + prefix_type, + _, + row_size, + _, + _group_size, + _, + buf, + _, + length, + _, + ascii, + )| { + let compiler = Arc::new(move |cenv: &dyn CompileEnv<'_>, abi: &Abi| { + let endianness = abi.endianness; + + let cprefix_str = prefix_str.clone().compile(&cenv, abi)?; + let cprefix_type = prefix_type.clone().compile(&cenv, abi)?; + let crow_size = row_size.clone().compile(&cenv, abi)?; + // Group size is ignored, as using any group size + // different than the underlying buffer type is + // undefined behavior. Therefore we can simply look at + // the kind of array we get at runtime and format it + // normally. + let cbuf = buf.clone().compile(&cenv, abi)?; + let clength = length.clone().compile(&cenv, abi)?; + let cascii = ascii.clone().compile(&cenv, abi)?; + + let eval = new_dyn_evaluator(move |env: &_| { + macro_rules! eval_int { + ($expr:expr) => {{ + let val: usize = match $expr.eval(env)? { + Value::U64Scalar(x) => Ok(x), + Value::I64Scalar(x) => Ok(x as u64), + val => Err(EvalError::IllegalType(val.into_static().ok())), + }? + .try_into() + .unwrap(); + val + }}; + } + + let prefix_str: Option = match cprefix_str.eval(env)? { + Value::U64Scalar(0) => None, + Value::I64Scalar(0) => None, + val => { + let val = val.deref_ptr(env)?; + Some(match val.to_str() { + Some(s) => Ok(s.into()), + None => Err(EvalError::IllegalType(val.into_static().ok())), + }?) + } + }; + + let prefix_type = eval_int!(cprefix_type); + let row_size = eval_int!(crow_size); + + let length = eval_int!(clength); + let ascii = eval_int!(cascii) != 0; + + macro_rules! print_array { + ($item_ty:ty, $iter:expr) => {{ + let item_size = core::mem::size_of::<$item_ty>(); + let items_per_row = row_size / item_size; + let writer = OwnedScratchBox::new_in( + move |out: &mut dyn fmt::Write| { + let mut closure = || -> Result<(), fmt::Error> { + for (row_i, row) in $iter + .into_iter() + .take(length) + .chunks(items_per_row) + .into_iter() + .enumerate() + { + write!(out, "\n")?; + + // enum { + // DUMP_PREFIX_NONE, + // DUMP_PREFIX_ADDRESS, + // DUMP_PREFIX_OFFSET + // }; + match prefix_type { + // We cannot handle + // DUMP_PREFIX_ADDRESS + // since the address + // would be meaningless + // (the address inside + // the buffer), so we + // treat it as the + // offset. + 1 | 2 => write!( + out, + "{}{:#02x}: ", + prefix_str.as_deref().unwrap_or(""), + row_i * items_per_row + )?, + _ => match prefix_str.as_deref() { + Some(prefix_str) => write!(out, "{prefix_str}: ")?, + None => (), + } + } + + macro_rules! print_hex { + ($buf:expr) => {{ + for (i, x) in $buf.enumerate() { + // Ensure the type we got + // passed matches what we + // effectively get. + let x: $item_ty = x; + if i != 0 { + write!(out, " ")?; + } + write!( + out, + "{:0size$x}", + x, + size = item_size * 2 + )?; + } + Ok(()) + }}; + } + + if ascii { + let mut vec = ScratchVec::with_capacity_in( + items_per_row, + env.scratch(), + ); + vec.extend(row); + + print_hex!(vec.iter().copied())?; + + write!(out, " ")?; + for item in vec { + let bytes = match endianness { + Endianness::Little => { + item.to_le_bytes() + } + Endianness::Big => { + item.to_be_bytes() + } + }; + for x in bytes { + let x = x as char; + write!( + out, + "{}", + if x.is_control() { + '.' + } else { + x + } + )?; + } + } + } else { + print_hex!(row)?; + } + } + Ok(()) + }; + closure().expect("could not render array to string") + }, + env.scratch(), + ); + + Ok(Value::Str(Str::new_procedural(writer))) + }}; + } + + match cbuf.eval(env)? { + Value::Bitmap(bitmap) => { + match &bitmap.chunk_size { + LongSize::Bits32 => print_array!(u32, bitmap.into_iter().as_chunks().map(|x| x.try_into().expect("Chunk requires more than 32 bits"))), + LongSize::Bits64 => print_array!(u64, bitmap.into_iter().as_chunks()), + } + } + Value::Raw(_, arr) => print_array!(u8, arr.iter().copied()), + Value::Str(s) => print_array!(u8, s.bytes().chain([0])), + + Value::U8Array(arr) => print_array!(u8, arr.iter().copied()), + Value::I8Array(arr) => print_array!(i8, arr.iter().copied()), + + Value::U16Array(arr) => print_array!(u16, arr.iter().copied()), + Value::I16Array(arr) => print_array!(i16, arr.iter().copied()), + + Value::U32Array(arr) => print_array!(u32, arr.iter().copied()), + Value::I32Array(arr) => print_array!(i32, arr.iter().copied()), + + Value::U64Array(arr) => print_array!(u64, arr.iter().copied()), + Value::I64Array(arr) => print_array!(i64, arr.iter().copied()), + + val => Err(EvalError::IllegalType(val.into_static().ok())), + } + }); + Ok(eval) + }); + + Ok(ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::Type(Type::Pointer(Box::new( + abi.char_typ(), + ))), + compiler, + }) + }, + ) + .parse(input) + }), + }) + } + id => Err(CParseError::InvalidVariableIdentifier(id.into())), + } +} + +#[derive(Clone)] +pub struct CGrammarCtx<'a> { + pub abi: &'a Abi, +} + +impl<'a> CGrammarCtx<'a> { + pub fn new(abi: &'a Abi) -> Self { + CGrammarCtx { abi } + } +} + +#[inline] +pub fn is_identifier(i: I) -> bool +where + I: nom::AsBytes, +{ + all_consuming(recognize(identifier::<_, ()>()))(i.as_bytes()) + .finish() + .is_ok() +} + +// https://port70.net/~nsz/c/c11/n1570.html#6.4.2.1 +#[inline] +pub fn identifier() -> impl nom::Parser +where + I: nom::AsBytes + + Clone + + nom::InputTake + + nom::Offset + + nom::Slice> + + nom::InputLength + + nom::InputIter + + nom::InputTakeAtPosition + + for<'a> nom::Compare<&'a str>, + ::Item: Clone + nom::AsChar, + E: FromExternalError + nom::error::ParseError, +{ + map_res_cut( + lexeme(recognize(pair( + alt((alpha1, tag("_"))), + many0(alt((alphanumeric1, tag("_")))), + ))), + // For some reason rustc fails to infer CGrammar properly, even + // though it looks like it should. Maybe this will be fixed in + // the future. + |s: I| { + from_utf8(s.as_bytes()) + .map_err(|err| CParseError::DecodeUtf8(err.to_string())) + .map(|s| s.into()) + }, + ) +} + +fn keyword<'a, I, E>(name: &'a str) -> impl nom::Parser + 'a +where + E: nom::error::ParseError + 'a, + I: nom::AsBytes + + Clone + + nom::InputTake + + nom::Offset + + nom::Slice> + + nom::InputLength + + nom::InputIter + + nom::InputTakeAtPosition + + for<'b> nom::Compare<&'b str> + + 'a, + ::Item: Clone + nom::AsChar, + E: FromExternalError + nom::error::ParseError, +{ + let mut inner = all_consuming(lexeme(tag(name))); + let mut identifier = recognize(identifier()); + move |input: I| { + let (input, id) = identifier.parse(input)?; + let (_, x) = inner.parse(id)?; + Ok((input, x)) + } +} + +fn escape_sequence() -> impl nom::Parser +where + I: Clone + + AsBytes + + nom::InputTake + + nom::InputLength + + nom::InputIter + + nom::InputTakeAtPosition + + nom::Slice>, + E: FromExternalError + nom::error::ParseError + nom::error::ContextError, +{ + context( + "escape sequence", + preceded( + char('\\'), + alt(( + // Simple escape sequence + alt(( + char('"').map(|_| b'\"'), + char('\'').map(|_| b'\''), + char('\\').map(|_| b'\\'), + char('n').map(|_| b'\n'), + char('t').map(|_| b'\t'), + char('a').map(|_| 0x07u8), + char('b').map(|_| 0x08u8), + char('e').map(|_| 0x1Bu8), + char('f').map(|_| 0x0Cu8), + char('r').map(|_| 0x0Du8), + char('v').map(|_| 0x0Bu8), + char('?').map(|_| 0x3Fu8), + )), + // Hexadecimal escape sequence + preceded( + char('x'), + map_res_cut(hex_u64, |x| { + x.try_into().map_err(|_| CParseError::CharOutOfRange(x)) + }), + ), + // Octal escape sequence + map_res_cut( + many_m_n( + 1, + 3, + alt(( + char('0'), + char('1'), + char('2'), + char('3'), + char('4'), + char('5'), + char('6'), + char('7'), + )), + ), + |digits| { + let zero: u64 = b'0'.into(); + let digits = digits.into_iter().rev(); + let mut x: u64 = 0; + let mut n = 1; + for digit in digits { + let digit: u64 = digit.into(); + let digit: u64 = digit - zero; + x += digit * n; + n *= 8; + } + x.try_into().map_err(|_| CParseError::CharOutOfRange(x)) + }, + ), + )), + ), + ) +} + +pub fn string_literal() -> impl nom::Parser +where + I: Clone + + AsBytes + + nom::InputTake + + nom::Slice> + + nom::InputLength + + nom::InputIter + + nom::InputTakeAtPosition + + for<'a> nom::Compare<&'a str>, + E: FromExternalError + nom::error::ContextError + nom::error::ParseError, +{ + many1(map_res_cut( + tuple(( + context( + "string encoding prefix", + lexeme(opt(alt((tag("u8"), tag("u"), tag("U"), tag("L"))))), + ), + lexeme(delimited( + char('"'), + cut(context( + "string char sequence", + // Regrettably, type inference with map_res() breaks + // and we are forced to spell out the type of input, + // including a reference to a lifetime introduced + // inside the grammar!() macro. + |mut input: I| { + let mut string: String = String::new(); + loop { + let res = escape_sequence::<_, E>().parse(input.clone()); + if let Ok((_input, c)) = res { + string.push(c.into()); + input = _input; + continue; + } + + // Parse runs of non-escaped chars + let res = is_not::<_, _, ()>(r#"\""#).parse(input.clone()); + match res { + Ok((_input, s)) => { + input = _input; + let s = match from_utf8(s.as_bytes()) { + Err(err) => { + return error( + input, + CParseError::DecodeUtf8(err.to_string()), + ) + } + Ok(s) => s, + }; + string.push_str(s); + continue; + } + _ => return Ok((input, string)), + } + } + }, + )), + char('"'), + )), + )), + |(prefix, string)| match prefix { + Some(_) => Err(CParseError::UnsupportedEncodingPrefix), + None => Ok(string), + }, + )) + .map(|seqs: Vec| { + let mut s = String::new(); + for seq in seqs { + s.push_str(&seq); + } + Expr::StringLiteral(s) + }) +} + +// CGrammar inspired by N1570 ISO C draft (latest C11 draft): +// https://port70.net/~nsz/c/c11/n1570.html#A.2.1 +grammar! { + name: pub CGrammar, + ctx: CGrammarCtx<'i>, + error: CParseError, + rules: { + + // https://port70.net/~nsz/c/c11/n1570.html#6.4.2.1 + rule identifier() -> Identifier { + identifier() + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.7.3p1 + rule type_qualifier() -> () { + alt(( + keyword("const"), + keyword("restrict"), + keyword("volatile"), + keyword("_Atomic"), + )).map(|_| ()) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.7.6 + rule declarator(abstract_declarator: bool) -> Declarator { + lexeme( + alt(( + context( + "pointer", + preceded( + pair( + lexeme(char('*')), + many0(Self::type_qualifier()), + ), + Self::declarator(abstract_declarator), + ).map(|declarator| { + // Apply the declarator's modification last, since they have the least + // precedence (arrays) + let modify_typ = Rc::new(move |typ| (declarator.modify_typ)(Type::Pointer(Box::new(typ)))); + + Declarator { + modify_typ, + ..declarator + } + }) + ), + Self::direct_declarator(abstract_declarator), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.7.6 + #[leftrec] rule direct_declarator(abstract_declarator: bool) -> Declarator { + let name = if abstract_declarator { + "abstract direct declarator" + } else { + "direct declarator" + }; + + let _id = || { + lexeme(Self::identifier().map(|id| Declarator { + identifier: Some(id), + modify_typ: Rc::new(|typ| typ), + })) + }; + let id = || { + move |input| { + if abstract_declarator { + alt(( + _id(), + success_with(|| Declarator { + identifier: None, + modify_typ: Rc::new(|typ| typ), + }), + )) + .parse(input) + } else { + _id().parse(input) + } + } + }; + + let paren = || { + context( + "parenthesized", + lexeme(parenthesized(Self::declarator(abstract_declarator))), + ) + }; + + let parameter_declaration = || lexeme(context( + "parameter declaration", + lexeme(pair( + Self::declaration_specifier(), + // We only have to deal with declarations containing only one + // declarator, i.e. we only handle "int foo" and not "int foo, bar;" + alt(( + Self::declarator(true), + Self::declarator(false), + )) + )).map(|(typ, declarator)| { + ParamDeclaration { + typ: (declarator.modify_typ)(typ), + identifier: declarator.identifier, + } + }), + )); + + let function = context( + "function", + pair( + Self::direct_declarator(abstract_declarator), + context( + "parameter list", + lexeme(parenthesized( + separated_list0( + lexeme(char(',')), + parameter_declaration(), + ) + )) + ) + ).map(|(declarator, params)| { + let modify_typ = Rc::new(move |typ| (declarator.modify_typ)( + Type::Function( + Box::new(typ), + params.clone() + ) + )); + Declarator { + modify_typ, + identifier: declarator.identifier, + } + }) + ); + + let array = context( + "array", + tuple(( + Self::grammar_ctx(), + Self::direct_declarator(abstract_declarator), + context( + "array size", + lexeme(delimited( + char('['), + preceded( + delimited( + opt(keyword("static")), + many0( + Self::type_qualifier() + ), + opt(keyword("static")), + ), + lexeme(opt(Self::assignment_expr())), + ), + char(']'), + )), + ), + )), + ).map( + |(ctx, declarator, array_size)| { + let array_size = match array_size { + Some(x) => { + match eval_unsigned(x, ctx.abi) { + Ok(0) => ArrayKind::ZeroLength, + Ok(x) => ArrayKind::Fixed(Ok(x)), + Err(err) => ArrayKind::Fixed(Err(Box::new(err))), + } + }, + None => ArrayKind::ZeroLength, + }; + let modify_typ = Rc::new(move |typ| (declarator.modify_typ)(Type::Array(Box::new(typ), array_size.clone()))); + Declarator { + modify_typ, + identifier: declarator.identifier, + } + } + ); + + lexeme( + context(name, + alt(( + array, function, paren(), id() + )) + ) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.7 + rule declaration_specifier() -> Type { + lexeme(move |mut input| { + let abi = &Self::get_ctx(&input).abi; + let long_size = abi.long_size; + let char_typ = abi.char_typ(); + + #[derive(Debug, Clone, Copy)] + enum DeclSignedness { + Signed, + Unsigned, + // This is important to represent unknown signedness so that we can + // differentiate a lone "signed" from nothing, as a lone "signed" is + // equivalent to "signed int" + Unknown, + } + + #[derive(Debug, Clone)] + enum State { + Unknown(DeclSignedness), + Char(DeclSignedness), + Short(DeclSignedness), + Int(DeclSignedness), + Long(DeclSignedness), + LongLong(DeclSignedness), + } + + // Tokens that we simply discard as they don't impact layout or pretty + // representation + let discard_parser = || { + context( + "discarded", + many0_count( + alt(( + keyword("extern").map(|_| ()), + keyword("static").map(|_| ()), + keyword("auto").map(|_| ()), + keyword("register").map(|_| ()), + keyword("_Thread_local").map(|_| ()), + Self::type_qualifier(), + )) + ) + ) + }; + + // Parse the tokens using a state machine to deal with various + // combinations of "signed int" "int signed" "signed", "long unsigned + // long" etc. + let mut state = State::Unknown(DeclSignedness::Unknown); + + loop { + // tokens we simply ignore + (input, _) = discard_parser().parse(input)?; + + macro_rules! fsm { + ($($tag:expr => $transition:expr,)*) => { + |input| { + let (input, id) = Self::identifier().parse(input)?; + + match &*id { + $( + $tag => { + let res: Result = $transition; + Ok((input, res)) + } + ),* + _ => fail(input) + } + } + } + } + let res = lexeme::<_, _, (), _>(fsm! { + "signed" => Ok(match &state { + State::Unknown(_) => State::Unknown(DeclSignedness::Signed), + State::Char(_) => State::Char(DeclSignedness::Signed), + State::Short(_) => State::Short(DeclSignedness::Signed), + State::Int(_) => State::Int(DeclSignedness::Signed), + State::Long(_) => State::Long(DeclSignedness::Signed), + State::LongLong(_) => State::LongLong(DeclSignedness::Signed), + }), + "unsigned" => Ok(match &state { + State::Unknown(_) => State::Unknown(DeclSignedness::Unsigned), + State::Char(_) => State::Char(DeclSignedness::Unsigned), + State::Short(_) => State::Short(DeclSignedness::Unsigned), + State::Int(_) => State::Int(DeclSignedness::Unsigned), + State::Long(_) => State::Long(DeclSignedness::Unsigned), + State::LongLong(_) => State::LongLong(DeclSignedness::Unsigned), + }), + + "char" => match &state { + State::Unknown(signedness) => Ok(State::Char(*signedness)), + x@State::Char(_) => Ok(x.clone()), + _ => Err(CParseError::InvalidTypeName), + }, + "short" => match &state { + State::Unknown(signedness) => Ok(State::Short(*signedness)), + State::Int(signedness) => Ok(State::Short(*signedness)), + _ => Err(CParseError::InvalidTypeName), + }, + "int" => match &state { + State::Unknown(signedness) => Ok(State::Int(*signedness)), + State::Char(_) => Err(CParseError::InvalidTypeName), + State::Int(_) => Err(CParseError::InvalidTypeName), + x => Ok(x.clone()), + }, + "long" => match &state { + State::Unknown(signedness) => Ok(State::Long(*signedness)), + State::Int(signedness) => Ok(State::Long(*signedness)), + State::Long(signedness) => Ok(State::LongLong(*signedness)), + _ => Err(CParseError::InvalidTypeName), + }, + }) + .parse(input.clone()); + + (input, state) = match res { + Ok((i, Ok(x))) => (i, x), + Ok((i, Err(err))) => return failure(i, err), + // We stop parsing when we can't recognize anything anymore. + // Either we hit something else (e.g. an "(" or "[") or we + // simply encountered a user-defined type. + Err(_) => break, + } + } + let (input, typ) = match state { + // If we did not hit any of "int", "signed" etc, then it's just a + // user-defined type that we can consume now. + State::Unknown(DeclSignedness::Unknown) => lexeme(alt(( + context( + "struct", + preceded(keyword("struct"), Self::identifier()) + .map(Type::Struct), + ), + context( + "enum", + preceded(keyword("enum"), Self::identifier()) + .map(|id| Type::Enum(Box::new(Type::Unknown), id)), + ), + context( + "union", + preceded(keyword("union"), Self::identifier()) + .map(Type::Union), + ), + context( + "scalar", + Self::identifier().map(|id| { + match id.as_ref() { + "void" => Type::Void, + "_Bool" => Type::Bool, + _ => { + let typ = match id.as_ref() { + "caddr_t" => Type::Pointer(Box::new(char_typ.clone())), + "bool" => Type::Bool, + + "s8" | "__s8" | "int8_t" => Type::I8, + "u8" | "__u8" | "uint8_t" | "u_char" | "unchar" | "u_int8_t" => Type::U8, + + "s16" | "__s16" | "int16_t" => Type::I16, + "u16" | "__u16" | "uint16_t" | "u_short" | "ushort" | "u_int16_t" | "__le16" | "__be16" | "__sum16" => Type::U16, + + "s32" | "__s32" | "int32_t" => Type::I32, + "u32" | "__u32" | "uint32_t" | "u_int" | "uint" | "u_int32_t" | "gfp_t" | "slab_flags_t" | "fmode_t" | "OM_uint32" | "dev_t" | "nlink_t" | "__le32" | "__be32" | "__wsum" | "__poll_t" => Type::U32, + + "s64" | "__s64" | "int64_t" | "loff_t" => Type::I64, + "u64" | "__u64" | "uint64_t" | "u_int64_t" | "sector_t" | "blkcnt_t" | "__le64" | "__be64" => Type::U64, + + "pid_t" => Type::I32, + + "u_long" | "ulong" | "off_t" | "ssize_t" | "ptrdiff_t" | "clock_t" | "irq_hw_number_t" => match long_size { + LongSize::Bits32 => Type::I32, + LongSize::Bits64 => Type::I64, + }, + + "uintptr_t" | "size_t" => match long_size { + LongSize::Bits32 => Type::U32, + LongSize::Bits64 => Type::U64, + }, + + _ => Type::Unknown, + }; + Type::Typedef(Box::new(typ), id) + } + } + }), + ), + ))) + .parse(input), + + // "signed" alone is equivalent to "signed int" + State::Unknown(DeclSignedness::Signed) => Ok((input, Type::I32)), + State::Unknown(DeclSignedness::Unsigned) => Ok((input, Type::U32)), + + State::Char(DeclSignedness::Signed) => Ok((input, Type::I8)), + State::Char(DeclSignedness::Unsigned) => Ok((input, Type::U8)), + + State::Char(DeclSignedness::Unknown) => Ok((input, char_typ)), + + State::Short(DeclSignedness::Signed | DeclSignedness::Unknown) => { + Ok((input, Type::I16)) + } + State::Short(DeclSignedness::Unsigned) => Ok((input, Type::U16)), + + State::Int(DeclSignedness::Signed | DeclSignedness::Unknown) => { + Ok((input, Type::I32)) + } + State::Int(DeclSignedness::Unsigned) => Ok((input, Type::U32)), + + State::Long(DeclSignedness::Signed | DeclSignedness::Unknown) => Ok(( + input, + match long_size { + LongSize::Bits32 => Type::I32, + LongSize::Bits64 => Type::I64, + }, + )), + State::Long(DeclSignedness::Unsigned) => Ok(( + input, + match long_size { + LongSize::Bits32 => Type::U32, + LongSize::Bits64 => Type::U64, + }, + )), + + State::LongLong(DeclSignedness::Signed | DeclSignedness::Unknown) => { + Ok((input, Type::I64)) + } + State::LongLong(DeclSignedness::Unsigned) => Ok((input, Type::U64)), + }?; + + let (input, _) = discard_parser().parse(input)?; + Ok((input, typ)) + }) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.7 + rule declaration() -> Declaration { + // Parser for ISO C + let iso = context( + "iso C declaration", + map_res_cut( + lexeme(pair( + Self::declaration_specifier(), + // We only have to deal with declarations containing only one + // declarator, i.e. we only handle "int foo" and not "int foo, bar;" + Self::declarator(false), + )), + |(typ, declarator)| { + let typ = (declarator.modify_typ)(typ); + match declarator.identifier { + Some(identifier) => { + Ok(Declaration { + identifier, + typ + }) + }, + None => Err(CParseError::DeclarationWithoutIdentifier(typ)) + } + }, + ), + ); + + // Invalid C syntax that ftrace outputs for its __data_loc and __rel_loc, e.g.: + // __data_loc char[] name + let data_loc = context( + "__data_loc declaration", + lexeme( + // Once we consumed "__data_loc", we don't want to allow + // backtracking back to the ISO C declaration, as we know it will + // never yield something sensible. + map_res_cut( + tuple(( + alt(( + keyword("__data_loc"), + keyword("__rel_loc"), + )), + Self::declaration_specifier(), + // This will be an abstract declarator, i.e. a declarator with + // no identifier (like parameters in a function prototype), as + // the name comes after the last "[]" + Self::declarator(true), + opt(context( + "__data_loc identifier", + lexeme(Self::identifier()), + )), + )), + |(kind, typ, abstract_declarator, identifier)| { + // Push the array sizes down the stack. The 2nd nested array takes the size of the 1st etc. + fn push_array_size( + typ: Type, + kind: ArrayKind, + ) -> Result<(bool, Type), CParseError> { + match typ { + Type::Array(_, ArrayKind::Dynamic(_)) | Type::DynamicScalar(..) => { + Err(CParseError::NestedDataLoc(typ)) + } + Type::Array(typ, kind_) => Ok(( + true, + Type::Array(Box::new(push_array_size(*typ, kind_)?.1), kind), + )), + Type::Pointer(typ) => { + let (_, typ) = push_array_size(*typ, kind)?; + // If an array is behind a pointer, it can be ignored. + Ok((false, Type::Pointer(Box::new(typ)))) + } + _ => Ok((false, typ)), + } + } + + let typ = (abstract_declarator.modify_typ)(typ); + + // Remove the inner array, which is corresponding to the last "[]" + // parsed. It only acts as a separator between the type specifier + // and the identifier, and actually corresponds to a top-level + // dynamic array. + + // The innermost array is a fixed "[]" that is part of the format. + // It is actually acting as a top-level array, so we push the array + // sizes down the stack and replace the top-level by a dynamic array. + let array_kind = match *kind.deref() { + b"__data_loc" => ArrayKind::Dynamic(DynamicKind::Dynamic), + b"__rel_loc" => ArrayKind::Dynamic(DynamicKind::DynamicRel), + _ => panic!("Unknown dynamic kind: {:?}", kind.deref()), + }; + let (is_array, pushed_typ) = push_array_size(typ.clone(), array_kind)?; + + let typ = if is_array { + match pushed_typ { + typ@Type::Array(..) => Ok(typ), + typ => Err(CParseError::DataLocArrayNotArray(typ)) + } + } else { + let scalar_kind = match *kind.deref() { + b"__data_loc" => DynamicKind::Dynamic, + b"__rel_loc" => DynamicKind::DynamicRel, + _ => panic!("Unknown dynamic kind: {:?}", kind.deref()), + }; + Ok(Type::DynamicScalar(Box::new(typ), scalar_kind)) + }?; + + let identifier = if is_array { + match identifier { + None => return Err(CParseError::DataLocArrayWithoutIdentifier(typ)), + Some(id) => id, + } + } else { + match (abstract_declarator.identifier, identifier) { + (Some(id), None) => id, + (None, Some(id)) => id, + (None, None) => { + return Err(CParseError::DataLocWithoutIdentifier); + } + (Some(id1), Some(id2)) => { + return Err(CParseError::DataLocAmbiguousIdentifier(id1, id2)); + }, + } + }; + + Ok(Declaration { + identifier, + typ + }) + }, + ), + ), + ); + + let parser = alt((data_loc, iso)); + context("declaration", lexeme(parser)) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.7.9p1 + rule initializer() -> Expr { + lexeme( + alt(( + delimited( + lexeme(char('{')), + cut(Self::initializer_list().map(Expr::InitializerList)), + preceded( + lexeme(opt(char(','))), + lexeme(char('}')), + ) + ), + Self::assignment_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.7.9p1 + rule initializer_list() -> Vec { + + enum DesignatorKind { + Subscript(Expr), + Member(Identifier), + } + let designator = || { + lexeme( + alt(( + delimited( + lexeme(char('[')), + cut(Self::constant_expr()), + lexeme(char(']')), + ).map(DesignatorKind::Subscript), + preceded( + lexeme(char('.')), + cut(Self::identifier()), + ).map(DesignatorKind::Member), + )).map(|kind| { + move |parent| match kind { + DesignatorKind::Subscript(expr) => Expr::Subscript(Box::new(parent), Box::new(expr)), + DesignatorKind::Member(id) => Expr::MemberAccess(Box::new(parent), id), + } + }) + ) + }; + + lexeme( + separated_list1( + lexeme(char(',')), + alt(( + separated_pair( + fold_many1( + designator(), + || Expr::Uninit, + |parent, combine| combine(parent) + ), + lexeme(char('=')), + cut(Self::initializer()), + ).map(|(designation, expr)| Expr::DesignatedInitializer(Box::new(designation), Box::new(expr))), + Self::initializer() + )), + ) + ) + } + + rule balanced_paren() -> Span<'i, CGrammar> { + lexeme(delimited( + char('('), + |input: Span<'i, Self> | { + let mut i = 0; + let mut level: usize = 1; + + + enum EscapingState { + Escaped, + Normal, + } + enum LiteralKind { + Str, + Char, + } + enum State { + Literal(LiteralKind, EscapingState), + Normal, + } + let mut state = State::Normal; + + for (i_, c) in input.as_bytes().iter().copied().enumerate() { + i = i_; + + match c { + // We do not match parenthesis inside string literals + b'"' => { + state = match state { + State::Normal => State::Literal(LiteralKind::Str, EscapingState::Normal), + State::Literal(LiteralKind::Str, EscapingState::Normal) => State::Normal, + State::Literal(LiteralKind::Str, EscapingState::Escaped) => State::Literal(LiteralKind::Str, EscapingState::Normal), + state@State::Literal(_, _) => state, + }; + } + // We do not match parenthesis inside char literals + b'\'' => { + state = match state { + State::Normal => State::Literal(LiteralKind::Char, EscapingState::Normal), + State::Literal(LiteralKind::Char, EscapingState::Normal) => State::Normal, + State::Literal(LiteralKind::Char, EscapingState::Escaped) => State::Literal(LiteralKind::Char, EscapingState::Normal), + state@State::Literal(_, _) => state, + }; + } + b'\\' => { + state = match state { + State::Normal => State::Normal, + State::Literal(kind, EscapingState::Normal) => State::Literal(kind, EscapingState::Escaped), + State::Literal(kind, EscapingState::Escaped) => State::Literal(kind, EscapingState::Normal), + }; + } + b'(' if matches!(state, State::Normal) => {level += 1;}, + b')' if matches!(state, State::Normal) => {level -= 1;}, + _ => () + } + if level == 0 { + break + } + } + take(i).parse(input) + }, + char(')'), + )) + } + + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.2p1 + #[leftrec] rule postfix_expr() -> Expr { + lexeme( + alt(( + context("postinc expr", + terminated( + Self::postfix_expr(), + lexeme(tag("++")), + ).map(|expr| Expr::PostInc(Box::new(expr))) + ), + context("postdec expr", + terminated( + Self::postfix_expr(), + lexeme(tag("--")), + ).map(|expr| Expr::PostDec(Box::new(expr))) + ), + context("subscript expr", + tuple(( + Self::postfix_expr(), + delimited( + lexeme(char('[')), + cut(Self::expr()), + lexeme(char(']')), + ), + )).map(|(array, index)| Expr::Subscript(Box::new(array), Box::new(index))) + ), + context("__builtin_expect", + preceded( + keyword("__builtin_expect"), + parenthesized( + Self::assignment_expr(), + ) + ).map(|expr| expr) + ), + context("func call expr", + move |input| { + let (input, f) = Self::postfix_expr().parse(input)?; + let abi = &Self::get_ctx(&input).abi; + let f = f.simplify(&EmptyEnv::new(), abi); + match f { + Expr::ExtensionMacro(desc) => { + context( + "extension function args", + lexeme(|input: Span<'i, Self>| { + match &desc.kind { + ExtensionMacroKind::FunctionLike{parser} => { + let desc = Arc::clone(&desc); + + let mut parser = move |input| { + let (input, args) = Self::balanced_paren().parse(input)?; + let saved_args = (*args.deref()).to_vec(); + let (_, compiler) = all_consuming(parser.deref()).parse(args)?; + + Ok((input, Expr::ExtensionMacroCall( + ExtensionMacroCall { + args: saved_args, + desc: Arc::clone(&desc), + compiler, + } + ))) + }; + + match parser.parse(input.clone()).finish() { + Err(err) => { + let err = match err.data { + None => FromParseError::from_parse_error(input.clone(), &err.inner), + Some(err) => err, + }; + failure(input, err) + } + Ok((input, call)) => Ok((input, call)), + } + } + ExtensionMacroKind::ObjectLike{value: _, typ: _} => { + error(input, CParseError::CannotCallObjectLikeMacro(desc.name.clone())) + }, + } + }) + ).parse(input) + } + _ => { + let (input, args) = parenthesized( + separated_list0( + lexeme(char(',')), + Self::assignment_expr() + ) + ).parse(input)?; + + Ok((input, Expr::FuncCall(Box::new(f), args))) + } + } + } + ), + context("member access expr", + separated_pair( + Self::postfix_expr(), + lexeme(char('.')), + cut(Self::identifier()), + ).map(|(value, member)| Expr::MemberAccess(Box::new(value), member)) + ), + context("deref member access expr", + separated_pair( + Self::postfix_expr(), + lexeme(tag("->")), + cut(Self::identifier()), + ).map(|(value, member)| Expr::MemberAccess(Box::new(Expr::Deref(Box::new(value))), member)) + ), + + context("compound literal", + tuple(( + parenthesized( + Self::type_name(), + ), + delimited( + lexeme(char('{')), + cut(Self::initializer_list()), + preceded( + lexeme(opt(char(','))), + lexeme(char('}')), + ) + ), + )).map(|(typ, init)| Expr::CompoundLiteral(typ, init)) + ), + Self::primary_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.5 + #[leftrec] rule multiplicative_expr() -> Expr { + lexeme( + alt(( + context("* expr", + separated_pair( + Self::multiplicative_expr(), + lexeme(char('*')), + Self::cast_expr(), + ).map(|(lop, rop)| Expr::Mul(Box::new(lop), Box::new(rop))) + ), + context("/ expr", + separated_pair( + Self::multiplicative_expr(), + lexeme(char('/')), + cut(Self::cast_expr()), + ).map(|(lop, rop)| Expr::Div(Box::new(lop), Box::new(rop))) + ), + context("% expr", + separated_pair( + Self::multiplicative_expr(), + lexeme(char('%')), + Self::cast_expr(), + ).map(|(lop, rop)| Expr::Mod(Box::new(lop), Box::new(rop))) + ), + Self::cast_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.6 + #[leftrec] rule additive_expr() -> Expr { + lexeme( + alt(( + context("+ expr", + separated_pair( + Self::additive_expr(), + lexeme(char('+')), + Self::multiplicative_expr(), + ).map(|(lop, rop)| Expr::Add(Box::new(lop), Box::new(rop))) + ), + context("- expr", + separated_pair( + Self::additive_expr(), + lexeme(char('-')), + Self::multiplicative_expr(), + ).map(|(lop, rop)| Expr::Sub(Box::new(lop), Box::new(rop))) + ), + Self::multiplicative_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.7 + #[leftrec] rule shift_expr() -> Expr { + lexeme( + alt(( + context("<< expr", + separated_pair( + Self::shift_expr(), + lexeme(tag("<<")), + Self::additive_expr(), + ).map(|(lop, rop)| Expr::LShift(Box::new(lop), Box::new(rop))) + ), + context(">> expr", + separated_pair( + Self::shift_expr(), + lexeme(tag(">>")), + Self::additive_expr(), + ).map(|(lop, rop)| Expr::RShift(Box::new(lop), Box::new(rop))) + ), + Self::additive_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.8 + #[leftrec] rule relational_expr() -> Expr { + lexeme( + alt(( + context("<= expr", + separated_pair( + Self::relational_expr(), + lexeme(tag("<=")), + cut(Self::shift_expr()), + ).map(|(lop, rop)| Expr::LoEq(Box::new(lop), Box::new(rop))) + ), + context(">= expr", + separated_pair( + Self::relational_expr(), + lexeme(tag(">=")), + cut(Self::shift_expr()), + ).map(|(lop, rop)| Expr::HiEq(Box::new(lop), Box::new(rop))) + ), + context("< expr", + separated_pair( + Self::relational_expr(), + lexeme(char('<')), + Self::shift_expr(), + ).map(|(lop, rop)| Expr::Lo(Box::new(lop), Box::new(rop))) + ), + context("> expr", + separated_pair( + Self::relational_expr(), + lexeme(char('>')), + Self::shift_expr(), + ).map(|(lop, rop)| Expr::Hi(Box::new(lop), Box::new(rop))) + ), + Self::shift_expr(), + )) + ) + } + + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.9 + #[leftrec] rule equality_expr() -> Expr { + lexeme( + alt(( + context("== expr", + separated_pair( + Self::equality_expr(), + lexeme(tag("==")), + cut(Self::relational_expr()), + ).map(|(lop, rop)| Expr::Eq(Box::new(lop), Box::new(rop))) + ), + context("!=", + separated_pair( + Self::equality_expr(), + lexeme(tag("!=")), + cut(Self::relational_expr()), + ).map(|(lop, rop)| Expr::NEq(Box::new(lop), Box::new(rop))) + ), + Self::relational_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.10 + #[leftrec] rule and_expr() -> Expr { + lexeme( + alt(( + context("& expr", + separated_pair( + Self::and_expr(), + // Avoid recognizing "A && B" as "A & (&B)" + lexeme(terminated(char('&'), not(char('&')))), + Self::equality_expr(), + ).map(|(lop, rop)| Expr::BitAnd(Box::new(lop), Box::new(rop))) + ), + Self::equality_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.11 + #[leftrec] rule exclusive_or_expr() -> Expr { + lexeme( + alt(( + context("^ expr", + separated_pair( + Self::exclusive_or_expr(), + lexeme(char('^')), + Self::and_expr(), + ).map(|(lop, rop)| Expr::BitXor(Box::new(lop), Box::new(rop))) + ), + Self::and_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.12 + #[leftrec] rule inclusive_or_expr() -> Expr { + lexeme( + alt(( + context("| expr", + separated_pair( + Self::inclusive_or_expr(), + lexeme(char('|')), + Self::exclusive_or_expr(), + ).map(|(lop, rop)| Expr::BitOr(Box::new(lop), Box::new(rop))) + ), + Self::exclusive_or_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.13 + #[leftrec] rule logical_and_expr() -> Expr { + lexeme( + alt(( + context("&& expr", + separated_pair( + Self::logical_and_expr(), + lexeme(tag("&&")), + Self::inclusive_or_expr(), + ).map(|(lop, rop)| Expr::And(Box::new(lop), Box::new(rop))) + ), + Self::inclusive_or_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.14 + #[leftrec] rule logical_or_expr() -> Expr { + lexeme( + alt(( + context("|| expr", + separated_pair( + Self::logical_or_expr(), + lexeme(tag("||")), + cut(Self::logical_and_expr()), + ).map(|(lop, rop)| Expr::Or(Box::new(lop), Box::new(rop))) + ), + Self::logical_and_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.3p1 + rule conditional_expr() -> Expr { + lexeme( + alt(( + context("ternary expr", + separated_pair( + context("ternary cond expr", + Self::logical_or_expr() + ), + lexeme(char('?')), + cut(separated_pair( + context("ternary true expr", + opt(Self::expr()) + ), + lexeme(char(':')), + context("ternary false expr", + cut(Self::conditional_expr()) + ), + )), + ).map(|(cond, (true_, false_))| { + // GNU extension allows "cond ?: false_" that is equivalent to + // "cond ? cond : false_". The only difference is that side effects of + // evaluating "cond" are not repeated, but it does not matter to our + // interpreter since we don't support side effects anyway. + let true_ = true_.unwrap_or(cond.clone()); + Expr::Ternary(Box::new(cond), Box::new(true_), Box::new(false_)) + }) + ), + Self::logical_or_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.7.6 + rule constant_expr() -> Expr { + Self::conditional_expr() + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.3p1 + rule assignment_expr() -> Expr { + lexeme( + alt(( + context("assignment", + tuple(( + Self::unary_expr(), + lexeme(alt(( + tuple(( + tag("="), + Self::assignment_expr(), + )), + tuple(( + alt(( + tag("*="), + tag("/="), + tag("%="), + tag("+="), + tag("-="), + tag("<<="), + tag(">>="), + tag("&="), + tag("^="), + tag("|="), + )), + cut(Self::assignment_expr()), + )) + ))) + )).map(|(lexpr, (op, rexpr))| { + use Expr::*; + match &op.fragment()[..] { + b"=" => Assign(Box::new(lexpr), Box::new(rexpr)), + b"*=" => Assign(Box::new(lexpr.clone()), Box::new(Mul(Box::new(lexpr), Box::new(rexpr)))), + b"/=" => Assign(Box::new(lexpr.clone()), Box::new(Div(Box::new(lexpr), Box::new(rexpr)))), + b"%=" => Assign(Box::new(lexpr.clone()), Box::new(Mod(Box::new(lexpr), Box::new(rexpr)))), + b"+=" => Assign(Box::new(lexpr.clone()), Box::new(Add(Box::new(lexpr), Box::new(rexpr)))), + b"-=" => Assign(Box::new(lexpr.clone()), Box::new(Sub(Box::new(lexpr), Box::new(rexpr)))), + b"<<=" => Assign(Box::new(lexpr.clone()), Box::new(LShift(Box::new(lexpr), Box::new(rexpr)))), + b">>=" => Assign(Box::new(lexpr.clone()), Box::new(RShift(Box::new(lexpr), Box::new(rexpr)))), + b"&=" => Assign(Box::new(lexpr.clone()), Box::new(BitAnd(Box::new(lexpr), Box::new(rexpr)))), + b"^=" => Assign(Box::new(lexpr.clone()), Box::new(BitXor(Box::new(lexpr), Box::new(rexpr)))), + b"|=" => Assign(Box::new(lexpr.clone()), Box::new(BitOr(Box::new(lexpr), Box::new(rexpr)))), + _ => panic!("unhandled assignment operator") + } + }) + ), + Self::conditional_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.3p1 + #[leftrec] rule unary_expr() -> Expr { + lexeme( + alt(( + context("preinc expr", + preceded( + lexeme(tag("++")), + cut(Self::unary_expr()), + ).map(|e| Expr::PreInc(Box::new(e))) + ), + context("predec expr", + preceded( + lexeme(tag("--")), + cut(Self::unary_expr()), + ).map(|e| Expr::PreDec(Box::new(e))) + ), + context("sizeof type", + preceded( + keyword("sizeof"), + parenthesized( + Self::type_name(), + ) + ).map(Expr::SizeofType) + ), + context("sizeof expr", + preceded( + keyword("sizeof"), + Self::unary_expr(), + ).map(|e| Expr::SizeofExpr(Box::new(e))) + ), + context("unary op expr", + tuple(( + lexeme( + // https://port70.net/~nsz/c/c11/n1570.html#6.5.3p1 + alt(( + context("unary &", char('&').map(|_| Box::new(|e| Expr::Addr(Box::new(e))) as Box _>)), + context("unary *", char('*').map(|_| Box::new(|e| Expr::Deref(Box::new(e))) as Box _>)), + context("unary +", char('+').map(|_| Box::new(|e| Expr::Plus(Box::new(e))) as Box _>)), + context("unary -", char('-').map(|_| Box::new(|e| Expr::Minus(Box::new(e))) as Box _>)), + context("unary ~", char('~').map(|_| Box::new(|e| Expr::Tilde(Box::new(e))) as Box _>)), + context("unary !", char('!').map(|_| Box::new(|e| Expr::Bang(Box::new(e))) as Box _>)), + )) + ), + Self::cast_expr(), + )).map(|(modify, e)| modify(e)) + ), + Self::postfix_expr(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.7.7p1 + rule type_name() -> Type { + lexeme( + alt(( + map_res_cut( + pair( + Self::grammar_ctx(), + preceded( + keyword("__typeof__"), + cut(parenthesized( + Self::expr(), + )) + ) + ), + |(ctx, expr)| { + expr.typ( &EmptyEnv::new(), ctx.abi).map_err(|_| CParseError::CouldNotGuessType(expr)) + } + ), + tuple(( + Self::declaration_specifier(), + Self::declarator(true), + )).map(|(typ, abstract_declarator)| + (abstract_declarator.modify_typ)(typ) + ) + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.4p1 + rule cast_expr() -> Expr { + lexeme( + alt(( + context("cast expr", + tuple(( + parenthesized( + Self::type_name(), + ), + Self::cast_expr(), + )).map(|(typ, e)| Expr::Cast(typ, Box::new(e))) + ), + Self::unary_expr(), + )) + ) + } + + rule escape_sequence() -> u8 { + escape_sequence() + } + + + // https://port70.net/~nsz/c/c11/n1570.html#6.4.4.4 + rule char_constant() -> Expr { + lexeme( + context("character constant", + map_res_cut( + tuple(( + context( + "char encoding prefix", + opt(alt((keyword("u8"), keyword("u"), keyword("U"), keyword("L")))), + ), + delimited( + char('\''), + cut(alt(( + Self::escape_sequence(), + u8, + ))), + char('\''), + ) + )), + |(prefix, c)| match prefix { + Some(_) => Err(CParseError::UnsupportedEncodingPrefix), + None => Ok(Expr::CharConstant(Type::I32, c.into())), + }, + ) + ) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.4.5 + rule string_literal() -> Expr { + string_literal() + } + + rule int_constant() -> Expr { + enum Kind { + Dec, + Hex, + Oct, + } + let value = || alt(( + preceded( + char('0'), + alt(( + context("octal constant", + dec_u64.map(|mut x| ( + Kind::Oct, + { + let mut val = 0; + for i in 0.. { + let rem = x % 10; + val += rem * 8_u64.pow(i); + x /= 10; + if x == 0 { + break + } + } + val + } + )) + ), + context("hex constant", + preceded( + alt((char('x'), char('X'))), + cut(hex_u64.map(|x| (Kind::Hex, x))), + ) + ), + )) + ), + context("decimal constant", dec_u64.map(|x| (Kind::Dec, x))), + )); + + enum ConstantSignedness { + Unsigned, + } + enum Longness { + Long, + LongLong, + } + + let unsigned_suffix = || alt((char('u'), char('U'))).map(|_| ConstantSignedness::Unsigned); + let long_suffix = || alt(( + tag("ll").map(|_| Longness::LongLong), + tag("LL").map(|_| Longness::LongLong), + tag("l").map(|_| Longness::Long), + tag("L").map(|_| Longness::Long), + )); + + let suffix = || alt(( + pair( + opt(unsigned_suffix()), + opt(long_suffix()), + ), + pair( + opt(long_suffix()), + opt(unsigned_suffix()), + ).map(|(x, y)| (y, x)), + )); + + + lexeme( + map_res_cut( + tuple(( + Self::grammar_ctx(), + value(), + suffix(), + )), + move |(ctx, (kind, x), suffix)| { + let abi = &ctx.abi; + let long_size = abi.long_size; + + macro_rules! max_val { + (U32) => {u32::MAX.into()}; + (U64) => {u64::MAX.into()}; + + (I32) => {i32::MAX.try_into().unwrap()}; + (I64) => {i64::MAX.try_into().unwrap()}; + (Long) => { + match long_size { + LongSize::Bits32 => max_val!(I32), + LongSize::Bits64 => max_val!(I64), + } + }; + (ULong) => { + match long_size { + LongSize::Bits32 => max_val!(U32), + LongSize::Bits64 => max_val!(U64), + } + }; + } + + + let (ulong_ctype, long_ctype) = match long_size { + LongSize::Bits32 => (Type::U32, Type::I32), + LongSize::Bits64 => (Type::U64, Type::I64), + }; + macro_rules! c_typ { + (Long) => {long_ctype.clone()}; + (ULong) => {ulong_ctype.clone()}; + ($typ:ident) => {Type::$typ}; + } + + macro_rules! guess_typ { + ($x:ident, $($typ:ident),*) => { + match $x { + $( + x if x <= max_val!($typ) => Ok(c_typ!($typ)), + )* + x => Err(CParseError::InvalidIntegerConstant(x)), + } + } + } + + use Longness::*; + use ConstantSignedness::Unsigned; + + // Encodes this table: + // https://port70.net/~nsz/c/c11/n1570.html#6.4.4.1p5 + let typ = match (kind, suffix) { + (Kind::Dec, (None, None)) => guess_typ!(x, I32, I64), + (Kind::Dec, (Some(Unsigned), None)) => guess_typ!(x, U32, U64), + + (Kind::Dec, (None, Some(Long))) => guess_typ!(x, Long, I64), + (Kind::Dec, (Some(Unsigned), Some(Long))) => guess_typ!(x, ULong, U64), + + (Kind::Dec, (None, Some(LongLong))) => guess_typ!(x, I64), + (Kind::Dec, (Some(Unsigned), Some(LongLong))) => guess_typ!(x, U64), + + // Oct and Hex constant have different type inference rules + (_, (None, None)) => guess_typ!(x, I32, U32, Long, ULong, I64, U64), + (_, (Some(Unsigned), None)) => guess_typ!(x, U32, U64), + + (_, (None, Some(Long))) => guess_typ!(x, Long, ULong, I64, U64), + (_, (Some(Unsigned), Some(Long))) => guess_typ!(x, ULong, U64), + + (_, (None, Some(LongLong))) => guess_typ!(x, I64, U64), + (_, (Some(Unsigned), Some(LongLong))) => guess_typ!(x, U64), + }?; + Ok(Expr::IntConstant(typ, x)) + } + ) + ) + } + + rule enum_constant() -> Expr { + lexeme(context("enum constant", + Self::identifier().map(|id| Expr::EnumConstant(Type::I32, id)) + )) + } + + rule constant() -> Expr { + context("constant", + alt(( + Self::char_constant(), + Self::int_constant(), + Self::enum_constant(), + )) + ) + } + + // https://port70.net/~nsz/c/c11/n1570.html#6.5.1p1 + rule primary_expr() -> Expr + { + lexeme( + alt(( + parenthesized( + Self::expr() + ), + Self::string_literal(), + map_res( + pair( + Self::grammar_ctx(), + Self::identifier(), + ), + |(ctx, id)| match id.deref() { + "REC" => { + // Make a REC variable and then take its + // address, rather than making a pointer-typed + // variable. This will allow the interpreter to + // simplify "REC->x" as it will see "(&REC)->x" + // that will get turned into "(*&REC).x" and + // then "REC.x". Doing it this way plays nicely + // with constant folding. + let typ = Type::Variable(id.clone()); + Ok(Expr::Addr(Box::new(Expr::Variable(typ, id)))) + }, + _ => { + let abi = &ctx.abi; + let kind = resolve_extension_macro(&id, abi)?; + + Ok(Expr::ExtensionMacro(Arc::new( + ExtensionMacroDesc { + name: id, + kind, + } + ))) + }, + } + ), + Self::constant(), + )) + ) + } + + rule expr() -> Expr { + lexeme( + context("expression", + separated_list1( + lexeme(char(',')), + Self::assignment_expr(), + ).map(|mut exprs| { + if exprs.len() == 1 { + exprs.remove(0) + } else { + Expr::CommaExpr(exprs) + } + }) + ), + ) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + cinterp::{CompileEnv, CompileError}, + closure::closure, + cparser, + grammar::PackratGrammar, + header::{Abi, Endianness, Signedness}, + parser::tests::test_parser, + }; + + #[test] + fn expr_test() { + fn test(src: &[u8], expected: Expr) { + let abi = Abi { + long_size: LongSize::Bits64, + endianness: Endianness::Little, + char_signedness: Signedness::Unsigned, + }; + let parser = CGrammar::expr(); + let ctx = CGrammarCtx::new(&abi); + let src = CGrammar::make_span(src, &ctx); + test_parser(expected, src, parser); + } + + use Expr::*; + + let rec_var = Addr(Box::new(Variable( + Type::Variable("REC".into()), + "REC".into(), + ))); + + let extension_compiler = Arc::new(closure!( + ( + for<'ceref, 'ce, 'a> Fn( + &'ceref (dyn CompileEnv<'ce> + 'ceref), + &'a Abi, + ) -> Result, CompileError> + ) + Send + Sync, + |_, _| panic!("non-implemented compiler") + )); + + // Decimal literal + test(b"1", IntConstant(Type::I32, 1)); + test(b"42", IntConstant(Type::I32, 42)); + test(b" 1 ", IntConstant(Type::I32, 1)); + test(b" 42 ", IntConstant(Type::I32, 42)); + + test( + b"1125899906842624", + IntConstant(Type::I64, 1125899906842624), + ); + test( + b"18446744073709551615u", + IntConstant(Type::U64, 18446744073709551615), + ); + + // Octal literal + test(b"01", IntConstant(Type::I32, 1)); + test(b"0777", IntConstant(Type::I32, 511)); + test(b"01234", IntConstant(Type::I32, 668)); + + // Hexadecimal literal + test(b"0x1", IntConstant(Type::I32, 1)); + test(b"0x1234", IntConstant(Type::I32, 4660)); + test(b"0x777", IntConstant(Type::I32, 1911)); + test(b"0X1", IntConstant(Type::I32, 1)); + test(b"0X1234", IntConstant(Type::I32, 4660)); + test(b"0X777", IntConstant(Type::I32, 1911)); + + // Char constant + test(b"'a'", CharConstant(Type::I32, 'a'.into())); + test(br#"'\n'"#, CharConstant(Type::I32, '\n'.into())); + test(br#"'\xff'"#, CharConstant(Type::I32, 0xff)); + test(br#"'\012'"#, CharConstant(Type::I32, 0o12)); + test(br#"'\0'"#, CharConstant(Type::I32, 0)); + + // String literal + test(br#""a""#, StringLiteral("a".into())); + test(br#"" hello world ""#, StringLiteral(" hello world ".into())); + test( + br#""1 hello world ""#, + StringLiteral("1 hello world ".into()), + ); + test( + br#""hello \n world""#, + StringLiteral("hello \n world".into()), + ); + test(br#""\n\t\\""#, StringLiteral("\n\t\\".into())); + + // Address of + test(b" &1 ", Addr(Box::new(IntConstant(Type::I32, 1)))); + + // Deref + test( + b" *&1 ", + Deref(Box::new(Addr(Box::new(IntConstant(Type::I32, 1))))), + ); + + // Unary + test(b"1", IntConstant(Type::I32, 1)); + test(b" 1 ", IntConstant(Type::I32, 1)); + test(b"+1", Plus(Box::new(IntConstant(Type::I32, 1)))); + test(b" + 1 ", Plus(Box::new(IntConstant(Type::I32, 1)))); + test(b"-1", Minus(Box::new(IntConstant(Type::I32, 1)))); + test(b" - 1 ", Minus(Box::new(IntConstant(Type::I32, 1)))); + test(b" ~ 1 ", Tilde(Box::new(IntConstant(Type::I32, 1)))); + test(b"!1 ", Bang(Box::new(IntConstant(Type::I32, 1)))); + test(b" ! 1 ", Bang(Box::new(IntConstant(Type::I32, 1)))); + + // Cast + test( + b"(int)1 ", + Cast(Type::I32, Box::new(IntConstant(Type::I32, 1))), + ); + test( + b"(type)1 ", + Cast( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + Box::new(IntConstant(Type::I32, 1)), + ), + ); + test( + b"(type)(1) ", + Cast( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + Box::new(IntConstant(Type::I32, 1)), + ), + ); + test( + b"-(int)1 ", + Minus(Box::new(Cast( + Type::I32, + Box::new(IntConstant(Type::I32, 1)), + ))), + ); + test( + b"-(int)(unsigned long)1 ", + Minus(Box::new(Cast( + Type::I32, + Box::new(Cast(Type::U64, Box::new(IntConstant(Type::I32, 1)))), + ))), + ); + test( + b"(__typeof__(42))1 ", + Cast(Type::I32, Box::new(IntConstant(Type::I32, 1))), + ); + + test( + b"(__typeof__(42ULL))1 ", + Cast(Type::U64, Box::new(IntConstant(Type::I32, 1))), + ); + + // Sizeof type + test(b"sizeof(unsigned long)", SizeofType(Type::U64)); + test( + b"sizeof (s32)", + SizeofType(Type::Typedef(Box::new(Type::I32), "s32".into())), + ); + test(b"sizeof (__typeof__(1))", SizeofType(Type::I32)); + test(b"sizeof (__typeof__(1ULL))", SizeofType(Type::U64)); + test( + b"sizeof(struct page)", + SizeofType(Type::Struct("page".into())), + ); + + // Sizeof expr + test( + b"sizeof(1)", + SizeofExpr(Box::new(IntConstant(Type::I32, 1))), + ); + + test( + b"sizeof(-(int)1)", + SizeofExpr(Box::new(Minus(Box::new(Cast( + Type::I32, + Box::new(IntConstant(Type::I32, 1)), + ))))), + ); + test( + b"sizeof - (int ) 1 ", + SizeofExpr(Box::new(Minus(Box::new(Cast( + Type::I32, + Box::new(IntConstant(Type::I32, 1)), + ))))), + ); + + // Pre-increment + test(b"++ 42 ", PreInc(Box::new(IntConstant(Type::I32, 42)))); + test( + b"++ sizeof - (int ) 1 ", + PreInc(Box::new(SizeofExpr(Box::new(Minus(Box::new(Cast( + Type::I32, + Box::new(IntConstant(Type::I32, 1)), + ))))))), + ); + + // Pre-decrement + test( + b"-- -42 ", + PreDec(Box::new(Minus(Box::new(IntConstant(Type::I32, 42))))), + ); + + // Addition + test( + b"1+2", + Add( + Box::new(IntConstant(Type::I32, 1)), + Box::new(IntConstant(Type::I32, 2)), + ), + ); + test( + b" 1 + 2 ", + Add( + Box::new(IntConstant(Type::I32, 1)), + Box::new(IntConstant(Type::I32, 2)), + ), + ); + test( + b" (1) + (2) ", + Add( + Box::new(IntConstant(Type::I32, 1)), + Box::new(IntConstant(Type::I32, 2)), + ), + ); + + // Operator precedence + test( + b" 1 + 2 * 3", + Add( + Box::new(IntConstant(Type::I32, 1)), + Box::new(Mul( + Box::new(IntConstant(Type::I32, 2)), + Box::new(IntConstant(Type::I32, 3)), + )), + ), + ); + + test( + b" 1 * 2 + 3", + Add( + Box::new(Mul( + Box::new(IntConstant(Type::I32, 1)), + Box::new(IntConstant(Type::I32, 2)), + )), + Box::new(IntConstant(Type::I32, 3)), + ), + ); + + test( + b" 1 * 2 + 3 << 4", + LShift( + Box::new(Add( + Box::new(Mul( + Box::new(IntConstant(Type::I32, 1)), + Box::new(IntConstant(Type::I32, 2)), + )), + Box::new(IntConstant(Type::I32, 3)), + )), + Box::new(IntConstant(Type::I32, 4)), + ), + ); + + test( + b" 1 * 2 + 3 << 4 | 5", + BitOr( + Box::new(LShift( + Box::new(Add( + Box::new(Mul( + Box::new(IntConstant(Type::I32, 1)), + Box::new(IntConstant(Type::I32, 2)), + )), + Box::new(IntConstant(Type::I32, 3)), + )), + Box::new(IntConstant(Type::I32, 4)), + )), + Box::new(IntConstant(Type::I32, 5)), + ), + ); + + // Function call + test( + b"f(1)", + FuncCall( + Box::new(EnumConstant(Type::I32, "f".into())), + vec![IntConstant(Type::I32, 1)], + ), + ); + test( + b" f (1, 2, 3) ", + FuncCall( + Box::new(EnumConstant(Type::I32, "f".into())), + vec![ + IntConstant(Type::I32, 1), + IntConstant(Type::I32, 2), + IntConstant(Type::I32, 3), + ], + ), + ); + test( + // This could be either a cast or a function call. + b" (type)(1, 2, 3)", + Cast( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + Box::new(CommaExpr(vec![ + IntConstant(Type::I32, 1), + IntConstant(Type::I32, 2), + IntConstant(Type::I32, 3), + ])), + ), + ); + + // Subscript + test( + b"REC[1]", + Subscript( + Box::new(rec_var.clone()), + Box::new(IntConstant(Type::I32, 1)), + ), + ); + test( + b"REC[1][2]", + Subscript( + Box::new(Subscript( + Box::new(rec_var.clone()), + Box::new(IntConstant(Type::I32, 1)), + )), + Box::new(IntConstant(Type::I32, 2)), + ), + ); + + // Member access + test( + b"REC.y", + MemberAccess(Box::new(rec_var.clone()), "y".into()), + ); + test( + b" REC . y ", + MemberAccess(Box::new(rec_var.clone()), "y".into()), + ); + test( + b"REC.y.z", + MemberAccess( + Box::new(MemberAccess(Box::new(rec_var.clone()), "y".into())), + "z".into(), + ), + ); + test( + b"REC->y", + MemberAccess(Box::new(Deref(Box::new(rec_var.clone()))), "y".into()), + ); + + test( + b"REC->y->z", + MemberAccess( + Box::new(Deref(Box::new(MemberAccess( + Box::new(Deref(Box::new(rec_var.clone()))), + "y".into(), + )))), + "z".into(), + ), + ); + + // Compound literal + test( + b"(type){0}", + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + vec![IntConstant(Type::I32, 0)], + ), + ); + test( + b"(type){0, 1}", + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + vec![IntConstant(Type::I32, 0), IntConstant(Type::I32, 1)], + ), + ); + test( + b"(type){.x = 0}", + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + vec![DesignatedInitializer( + Box::new(MemberAccess(Box::new(Uninit), "x".into())), + Box::new(IntConstant(Type::I32, 0)), + )], + ), + ); + test( + b"(type){.x = 0, }", + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + vec![DesignatedInitializer( + Box::new(MemberAccess(Box::new(Uninit), "x".into())), + Box::new(IntConstant(Type::I32, 0)), + )], + ), + ); + test( + b"(type){.x = {0, 1}}", + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + vec![DesignatedInitializer( + Box::new(MemberAccess(Box::new(Uninit), "x".into())), + Box::new(InitializerList(vec![ + IntConstant(Type::I32, 0), + IntConstant(Type::I32, 1), + ])), + )], + ), + ); + test( + b"(type){.x = (type2){0}}", + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + vec![DesignatedInitializer( + Box::new(MemberAccess(Box::new(Uninit), "x".into())), + Box::new(CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type2".into()), + vec![IntConstant(Type::I32, 0)], + )), + )], + ), + ); + test( + b"(type){.x = {(type2){0}, (type3){1, 2}}, .y={3}, .z=4}", + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + vec![ + DesignatedInitializer( + Box::new(MemberAccess(Box::new(Uninit), "x".into())), + Box::new(InitializerList(vec![ + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type2".into()), + vec![IntConstant(Type::I32, 0)], + ), + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type3".into()), + vec![IntConstant(Type::I32, 1), IntConstant(Type::I32, 2)], + ), + ])), + ), + DesignatedInitializer( + Box::new(MemberAccess(Box::new(Uninit), "y".into())), + Box::new(InitializerList(vec![IntConstant(Type::I32, 3)])), + ), + DesignatedInitializer( + Box::new(MemberAccess(Box::new(Uninit), "z".into())), + Box::new(IntConstant(Type::I32, 4)), + ), + ], + ), + ); + test( + b"(type){.x[0] = 1}", + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + vec![DesignatedInitializer( + Box::new(Subscript( + Box::new(MemberAccess(Box::new(Uninit), "x".into())), + Box::new(IntConstant(Type::I32, 0)), + )), + Box::new(IntConstant(Type::I32, 1)), + )], + ), + ); + test( + b"(type){.x[0].y = 1}", + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + vec![DesignatedInitializer( + Box::new(MemberAccess( + Box::new(Subscript( + Box::new(MemberAccess(Box::new(Uninit), "x".into())), + Box::new(IntConstant(Type::I32, 0)), + )), + "y".into(), + )), + Box::new(IntConstant(Type::I32, 1)), + )], + ), + ); + test( + b"(type){[0]= 1, 2}", + CompoundLiteral( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + vec![ + DesignatedInitializer( + Box::new(Subscript( + Box::new(Uninit), + Box::new(IntConstant(Type::I32, 0)), + )), + Box::new(IntConstant(Type::I32, 1)), + ), + IntConstant(Type::I32, 2), + ], + ), + ); + + // Comma operator + test( + b"(1,2)", + CommaExpr(vec![IntConstant(Type::I32, 1), IntConstant(Type::I32, 2)]), + ); + + // Ambiguous cases + + // Amibiguity of is lifted by 6.4p4 stating that the tokenizer is + // greedy, i.e. the following is tokenized as "1 ++ + 2": + // https://port70.net/~nsz/c/c11/n1570.html#6.4p4 + test( + b" 1 +++ 2 ", + Add( + Box::new(PostInc(Box::new(IntConstant(Type::I32, 1)))), + Box::new(IntConstant(Type::I32, 2)), + ), + ); + test( + b" 1 +++++ 2 ", + Add( + Box::new(PostInc(Box::new(PostInc(Box::new(IntConstant( + Type::I32, + 1, + )))))), + Box::new(IntConstant(Type::I32, 2)), + ), + ); + + test( + b" 1 --- 2 ", + Sub( + Box::new(PostDec(Box::new(IntConstant(Type::I32, 1)))), + Box::new(IntConstant(Type::I32, 2)), + ), + ); + test( + b" 1 ----- 2 ", + Sub( + Box::new(PostDec(Box::new(PostDec(Box::new(IntConstant( + Type::I32, + 1, + )))))), + Box::new(IntConstant(Type::I32, 2)), + ), + ); + + // This is genuinely ambiguous: it can be either a cast to type "type" + // of "+2" or the addition of a "type" variable and 2. + // We parse it as a cast as the expressions we are interested in only + // contain one variable (REC). + test( + b" (type) + (2) ", + Cast( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + Box::new(Plus(Box::new(IntConstant(Type::I32, 2)))), + ), + ); + + // Another ambiguous case: could be a function call or a cast. We decide + // to treat that as a cast, since you can make a call without the extra + // paren. + test( + b" (type)(2) ", + Cast( + Type::Typedef(Box::new(Type::Unknown), "type".into()), + Box::new(IntConstant(Type::I32, 2)), + ), + ); + + // More complex cases + test( + br#"(REC->prev_state & ((((0x0000 | 0x0001 | 0x0002) + 1) << 1) - 1)) ? __print_flags(REC->prev_state & ((((0x0000 | 0x0001 | 0x0002) + 1) << 1) - 1), "|", { 0x0001, "S" }, { 0x0002, "D" }) : "R", REC->prev_state & (((0x0000 | 0x0001 | 0x0002) + 1) << 1) ? "+" : """#, + CommaExpr(vec![ + Ternary( + Box::new(BitAnd( + Box::new(MemberAccess( + Box::new(Deref(Box::new( + Addr(Box::new( + Variable(Type::Variable("REC".into()), "REC".into()) + )) + ))), + "prev_state".into() + )), + Box::new(Sub( + Box::new(LShift( + Box::new(Add( + Box::new(BitOr( + Box::new(BitOr( + Box::new(IntConstant(Type::I32, 0)), + Box::new(IntConstant(Type::I32, 1)) + )), + Box::new(IntConstant(Type::I32, 2)) + )), + Box::new(IntConstant(Type::I32, 1)) + )), + Box::new(IntConstant(Type::I32, 1)) + )), + Box::new(IntConstant(Type::I32, 1)) + )) + )), + Box::new(ExtensionMacroCall(cparser::ExtensionMacroCall { + args: br#"REC->prev_state & ((((0x0000 | 0x0001 | 0x0002) + 1) << 1) - 1), "|", { 0x0001, "S" }, { 0x0002, "D" }"#.to_vec(), + desc: Arc::new( + ExtensionMacroDesc { + name: "__print_flags".into(), + kind: ExtensionMacroKind::FunctionLike { + parser: Box::new(|_| panic!("non implemented parser")) + }, + } + ), + compiler: ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::Type(Type::Pointer(Box::new(Type::U8))), + compiler: extension_compiler.clone() + } + })), + Box::new(StringLiteral("R".into())) + ), + Ternary( + Box::new(BitAnd( + Box::new(MemberAccess( + Box::new(Deref( + Box::new(Addr( + Box::new(Variable( + Type::Variable("REC".into()), + "REC".into() + )) + )), + )), + "prev_state".into() + )), + Box::new(LShift( + Box::new(Add( + Box::new(BitOr( + Box::new(BitOr( + Box::new(IntConstant(Type::I32, 0)), + Box::new(IntConstant(Type::I32, 1)) + )), + Box::new(IntConstant(Type::I32, 2)) + )), + Box::new(IntConstant(Type::I32, 1)) + )), + Box::new(IntConstant(Type::I32, 1)) + )) + )), + Box::new(StringLiteral("+".into())), + Box::new(StringLiteral("".into())) + ) + ]) + ); + } + + #[test] + fn declaration_test() { + fn test<'a>(decl: &'a [u8], id: &'a str, typ: Type) { + let expected = Declaration { + identifier: id.into(), + typ, + }; + let abi = Abi { + long_size: LongSize::Bits64, + endianness: Endianness::Little, + char_signedness: Signedness::Unsigned, + }; + let parser = CGrammar::declaration(); + let ctx = CGrammarCtx::new(&abi); + let decl = CGrammar::make_span(decl, &ctx); + test_parser(expected, decl, parser); + } + + let u64_typ = Type::Typedef(Box::new(Type::U64), "u64".into()); + let s64_typ = Type::Typedef(Box::new(Type::I64), "s64".into()); + + // Basic + test(b"u64 foo", "foo", u64_typ.clone()); + test(b"u64 static_foo", "static_foo", u64_typ.clone()); + test(b"u64 static foo", "foo", u64_typ.clone()); + + test(b" u64 \t foo\t", "foo", u64_typ.clone()); + test(b" const volatile u64 foo", "foo", u64_typ.clone()); + test(b" u64 const volatile foo", "foo", u64_typ.clone()); + test( + b" const\t volatile _Atomic u64 \t foo\t", + "foo", + u64_typ.clone(), + ); + test(b"int interval", "interval", Type::I32); + + // Structs + Enum + Union + test( + b"struct mystruct foo", + "foo", + Type::Struct("mystruct".into()), + ); + test( + b"struct structmy foo", + "foo", + Type::Struct("structmy".into()), + ); + test( + b"enum mystruct foo", + "foo", + Type::Enum(Box::new(Type::Unknown), "mystruct".into()), + ); + test(b"union mystruct foo", "foo", Type::Union("mystruct".into())); + + // Signed/Unsigned + test(b"int signed extern const foo ", "foo", Type::I32); + test(b"signed extern const foo ", "foo", Type::I32); + test( + b"_Atomic\t unsigned extern const foo ", + "foo", + Type::U32, + ); + test(b"int unsigned extern const foo ", "foo", Type::U32); + test( + b" long \t long unsigned extern const foo ", + "foo", + Type::U64, + ); + + test( + b"int long long unsigned extern const foo ", + "foo", + Type::U64, + ); + + test( + b"long extern int long unsigned const foo ", + "foo", + Type::U64, + ); + + // Pointers + test(b"u64 *foo", "foo", Type::Pointer(Box::new(u64_typ.clone()))); + test( + b" u64 * \tfoo ", + "foo", + Type::Pointer(Box::new(u64_typ.clone())), + ); + test(b"u64 *foo", "foo", Type::Pointer(Box::new(u64_typ.clone()))); + test( + b" u64 * \tfoo ", + "foo", + Type::Pointer(Box::new(u64_typ.clone())), + ); + test( + b"u64 * const foo", + "foo", + Type::Pointer(Box::new(u64_typ.clone())), + ); + test( + b" u64 * \tconst\tfoo ", + "foo", + Type::Pointer(Box::new(u64_typ.clone())), + ); + test( + b" long unsigned long * \tconst\tfoo ", + "foo", + Type::Pointer(Box::new(Type::U64)), + ); + test( + b" const volatile u64 * const foo", + "foo", + Type::Pointer(Box::new(u64_typ.clone())), + ); + test( + b" const\tvolatile u64 * \tconst\tfoo ", + "foo", + Type::Pointer(Box::new(u64_typ.clone())), + ); + test( + b" const\tvolatile u64 * const * \tconst\tfoo ", + "foo", + Type::Pointer(Box::new(Type::Pointer(Box::new(u64_typ.clone())))), + ); + test( + b" const\tvolatile u64 _Atomic * const * \tconst\tfoo ", + "foo", + Type::Pointer(Box::new(Type::Pointer(Box::new(u64_typ.clone())))), + ); + test( + b"struct callback_head * rhp", + "rhp", + Type::Pointer(Box::new(Type::Struct("callback_head".into()))), + ); + + test( + b"u64 *static_foo", + "static_foo", + Type::Pointer(Box::new(u64_typ.clone())), + ); + + // Arrays + test( + b" u64 foo\t []", + "foo", + Type::Array(Box::new(u64_typ.clone()), ArrayKind::ZeroLength), + ); + test( + b" u64 foo\t []\t\t", + "foo", + Type::Array(Box::new(u64_typ.clone()), ArrayKind::ZeroLength), + ); + test( + b" u64 foo\t [124]", + "foo", + Type::Array(Box::new(u64_typ.clone()), ArrayKind::Fixed(Ok(124))), + ); + test( + b" u64 foo\t [static 124]", + "foo", + Type::Array(Box::new(u64_typ.clone()), ArrayKind::Fixed(Ok(124))), + ); + test( + b"u64 foo [static_bar]", + "foo", + Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Err(Box::new(InterpError::CompileError(Box::new( + CompileError::ExprNotHandled(Expr::EnumConstant( + Type::I32, + "static_bar".into(), + )), + ))))), + ), + ); + test( + b" u64 (*foo) [1]", + "foo", + Type::Pointer(Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Ok(1)), + ))), + ); + test( + b" u64 ((*foo)) [1]", + "foo", + Type::Pointer(Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Ok(1)), + ))), + ); + test( + b" u64 (*foo[]) [1]", + "foo", + Type::Array( + Box::new(Type::Pointer(Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Ok(1)), + )))), + ArrayKind::ZeroLength, + ), + ); + test( + b" u64(*foo[]\t)[1]", + "foo", + Type::Array( + Box::new(Type::Pointer(Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Ok(1)), + )))), + ArrayKind::ZeroLength, + ), + ); + test( + b" u64 foo\t [A+B]\t\t", + "foo", + Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Err(Box::new(InterpError::CompileError(Box::new( + CompileError::ExprNotHandled(Expr::EnumConstant(Type::I32, "A".into())), + ))))), + ), + ); + + // Nested arrays + test( + b" u64 foo\t [][]", + "foo", + Type::Array( + Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::ZeroLength, + )), + ArrayKind::ZeroLength, + ), + ); + test( + b" u64 foo\t [1][2][3]", + "foo", + Type::Array( + Box::new(Type::Array( + Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Ok(3)), + )), + ArrayKind::Fixed(Ok(2)), + )), + ArrayKind::Fixed(Ok(1)), + ), + ); + test( + b" u64 (*foo[3]) [2][1] ", + "foo", + Type::Array( + Box::new(Type::Pointer(Box::new(Type::Array( + Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Ok(1)), + )), + ArrayKind::Fixed(Ok(2)), + )))), + ArrayKind::Fixed(Ok(3)), + ), + ); + + // Function pointers + test( + b"int(*f)()", + "f", + Type::Pointer(Box::new(Type::Function(Box::new(Type::I32), Vec::new()))), + ); + test( + b" int ( * f ) ( ) ", + "f", + Type::Pointer(Box::new(Type::Function(Box::new(Type::I32), Vec::new()))), + ); + test( + b"int(*f)(unsigned int, s64 param1)", + "f", + Type::Pointer(Box::new(Type::Function( + Box::new(Type::I32), + vec![ + ParamDeclaration { + identifier: None, + typ: Type::U32, + }, + ParamDeclaration { + identifier: Some("param1".into()), + typ: s64_typ.clone(), + }, + ], + ))), + ); + test( + b"foobar(*f)(unsigned int, s64 param1, pid_t(*param2)(u64))[1]", + "f", + Type::Pointer(Box::new(Type::Function( + Box::new(Type::Array( + Box::new(Type::Typedef(Box::new(Type::Unknown), "foobar".into())), + ArrayKind::Fixed(Ok(1)), + )), + vec![ + ParamDeclaration { + identifier: None, + typ: Type::U32, + }, + ParamDeclaration { + identifier: Some("param1".into()), + typ: s64_typ.clone(), + }, + ParamDeclaration { + identifier: Some("param2".into()), + typ: Type::Pointer(Box::new(Type::Function( + Box::new(Type::Typedef(Box::new(Type::I32), "pid_t".into())), + vec![ParamDeclaration { + identifier: None, + typ: u64_typ.clone(), + }], + ))), + }, + ], + ))), + ); + test( + b"foobar(* const arr[2])(unsigned int, s64 param1, pid_t(*param2)(u64))[1]", + "arr", + Type::Array( + Box::new(Type::Pointer(Box::new(Type::Function( + Box::new(Type::Array( + Box::new(Type::Typedef(Box::new(Type::Unknown), "foobar".into())), + ArrayKind::Fixed(Ok(1)), + )), + vec![ + ParamDeclaration { + identifier: None, + typ: Type::U32, + }, + ParamDeclaration { + identifier: Some("param1".into()), + typ: s64_typ.clone(), + }, + ParamDeclaration { + identifier: Some("param2".into()), + typ: Type::Pointer(Box::new(Type::Function( + Box::new(Type::Typedef(Box::new(Type::I32), "pid_t".into())), + vec![ParamDeclaration { + identifier: None, + typ: u64_typ.clone(), + }], + ))), + }, + ], + )))), + ArrayKind::Fixed(Ok(2)), + ), + ); + + test( + b"short (*(*foo)(int))(void)", + "foo", + Type::Pointer(Box::new(Type::Function( + Box::new(Type::Pointer(Box::new(Type::Function( + Box::new(Type::I16), + vec![ParamDeclaration { + identifier: None, + typ: Type::Void, + }], + )))), + vec![ParamDeclaration { + identifier: None, + typ: Type::I32, + }], + ))), + ); + + // Scalar __data_loc + test( + b"__data_loc u64 foo", + "foo", + Type::DynamicScalar(Box::new(u64_typ.clone()), DynamicKind::Dynamic), + ); + + test( + b"__data_loc cpumask_t mask", + "mask", + Type::DynamicScalar( + Box::new(Type::Typedef(Box::new(Type::Unknown), "cpumask_t".into())), + DynamicKind::Dynamic, + ), + ); + + test( + b"__data_loc u64* foo", + "foo", + Type::DynamicScalar( + Box::new(Type::Pointer(Box::new(u64_typ.clone()))), + DynamicKind::Dynamic, + ), + ); + + test( + b"__data_loc unsigned volatile * const foo", + "foo", + Type::DynamicScalar( + Box::new(Type::Pointer(Box::new(Type::U32))), + DynamicKind::Dynamic, + ), + ); + + test( + b"__data_loc u64 (*)[3] foo", + "foo", + Type::DynamicScalar( + Box::new(Type::Pointer(Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Ok(3)), + )))), + DynamicKind::Dynamic, + ), + ); + + test( + b"__data_loc const u64 _Atomic ( * volatile)[3] foo", + "foo", + Type::DynamicScalar( + Box::new(Type::Pointer(Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Ok(3)), + )))), + DynamicKind::Dynamic, + ), + ); + + // Array __data_loc and __rel_loc + test( + b"__rel_loc u64[] foo", + "foo", + Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Dynamic(DynamicKind::DynamicRel), + ), + ); + + test( + b"__data_loc u64[] foo", + "foo", + Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Dynamic(DynamicKind::Dynamic), + ), + ); + + test( + b" __data_loc\t u64 []foo", + "foo", + Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Dynamic(DynamicKind::Dynamic), + ), + ); + test( + b" __data_loc\t u64 [42][]foo", + "foo", + Type::Array( + Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Ok(42)), + )), + ArrayKind::Dynamic(DynamicKind::Dynamic), + ), + ); + + test( + b" __data_loc\t u64 [42][43][]foo", + "foo", + Type::Array( + Box::new(Type::Array( + Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Ok(43)), + )), + ArrayKind::Fixed(Ok(42)), + )), + ArrayKind::Dynamic(DynamicKind::Dynamic), + ), + ); + + test( + b" __data_loc u64 (*[3]) [2][]foo", + "foo", + Type::Array( + Box::new(Type::Pointer(Box::new(Type::Array( + Box::new(Type::Array( + Box::new(u64_typ.clone()), + ArrayKind::Fixed(Ok(2)), + )), + ArrayKind::Fixed(Ok(3)), + )))), + ArrayKind::Dynamic(DynamicKind::Dynamic), + ), + ); + + // All together + test( + b" const\tvolatile unsigned int volatile*const _Atomic* \tconst\tfoo [sizeof(struct foo)] \t[42] ", + "foo", + Type::Array( + Box::new(Type::Array( + Box::new(Type::Pointer(Box::new(Type::Pointer(Box::new( + Type::U32, + ))))), + ArrayKind::Fixed(Ok(42)), + )), + ArrayKind::Fixed(Err(Box::new(InterpError::CompileError(Box::new(CompileError::UnknownSize(Type::Struct("foo".into()))))))), + ), + ); + test( + b" const\tvolatile int volatile signed*const _Atomic* \tconst\tfoo [sizeof(struct foo)] \t[42] ", + "foo", + Type::Array( + Box::new(Type::Array( + Box::new(Type::Pointer(Box::new(Type::Pointer(Box::new( + Type::I32, + ))))), + ArrayKind::Fixed(Ok(42)), + )), + ArrayKind::Fixed(Err(Box::new(InterpError::CompileError(Box::new(CompileError::UnknownSize(Type::Struct("foo".into()))))))), + ), + ); + + test( + b" __data_loc\tconst\tvolatile int volatile signed*const _Atomic* \tconst \t[] foo", + "foo", + Type::Array( + Box::new(Type::Pointer(Box::new(Type::Pointer(Box::new(Type::I32))))), + ArrayKind::Dynamic(DynamicKind::Dynamic), + ), + ); + + test( + b" __data_loc \tconst\tvolatile int volatile signed*const _Atomic* \tconst\t [sizeof(struct foo)] \t[42] []foo", + "foo", + Type::Array( + Box::new(Type::Array( + Box::new(Type::Array( + Box::new(Type::Pointer(Box::new(Type::Pointer(Box::new( + Type::I32, + ))))), + ArrayKind::Fixed(Ok(42)), + )), + ArrayKind::Fixed(Err(Box::new(InterpError::CompileError(Box::new(CompileError::UnknownSize(Type::Struct("foo".into()))))))), + ), + ), ArrayKind::Dynamic(DynamicKind::Dynamic))); + } +} diff --git a/tools/trace-parser/traceevent/src/error.rs b/tools/trace-parser/traceevent/src/error.rs new file mode 100644 index 0000000000..c3c4f6495c --- /dev/null +++ b/tools/trace-parser/traceevent/src/error.rs @@ -0,0 +1,10 @@ +macro_rules! convert_err_impl { + ($src:path, $variant:ident, $dst:ident) => { + impl From<$src> for $dst { + fn from(err: $src) -> Self { + $dst::$variant(Box::new(err.into())) + } + } + }; +} +pub(crate) use convert_err_impl; diff --git a/tools/trace-parser/traceevent/src/grammar.rs b/tools/trace-parser/traceevent/src/grammar.rs new file mode 100644 index 0000000000..8e292d34ff --- /dev/null +++ b/tools/trace-parser/traceevent/src/grammar.rs @@ -0,0 +1,615 @@ +use core::cell::RefCell; +use std::rc::Rc; + +use nom::{Finish as _, Parser}; +use nom_locate::LocatedSpan; + +use crate::{ + parser::{FromParseError, NomError}, + scratch::{OwnedScratchBox, ScratchAlloc}, +}; + +pub type Span<'i, G> = LocatedSpan< + &'i [u8], + ( + &'i ::Ctx<'i>, + Rc::State<'i>>>>, + Rc, + ), +>; + +#[derive(Clone)] +pub struct LocatedState { + pub state: State, + pub pos: usize, +} + +pub trait PackratGrammar { + type Ctx<'i>: 'i; + type State<'i>; + type Error; + + #[inline] + fn get_ctx<'iref, 'i: 'iref>(input: &'iref Span<'i, Self>) -> &'i Self::Ctx<'i> { + input.extra.0 + } + + #[allow(clippy::type_complexity)] + #[inline] + fn grammar_ctx<'i, E>( + ) -> fn(Span<'i, Self>) -> nom::IResult, &'i Self::Ctx<'i>, E> { + move |input: Span<'i, Self>| { + let ctx = Self::get_ctx(&input); + Ok((input, ctx)) + } + } + + fn make_span<'i>(input: &'i [u8], ctx: &'i Self::Ctx<'i>) -> Span<'i, Self> + where + Self::State<'i>: Default + Clone, + Self: Sized, + { + // Use len + 1 so that we can store a state for empty strings as well + // (and for the last rule of the parse when we consumed the full input) + let len = input.len() + 1; + let vec = vec![Default::default(); len]; + let ctx = ( + ctx, + Rc::new(RefCell::new(vec)), + Rc::new(ScratchAlloc::new()), + ); + LocatedSpan::new_extra(input, ctx) + } + + #[inline] + fn apply_rule<'i, 'p, O, E, P>( + mut rule: P, + input: &'i [u8], + ctx: &'i Self::Ctx<'i>, + ) -> Result<(&'i [u8], O), E> + where + E: FromParseError<&'i [u8], nom::error::VerboseError<&'i [u8]>>, + P: 'p + Parser, O, NomError>>>, + ::State<'i>: Default + Clone, + Self: Sized, + for<'a> Self::Ctx<'a>: 'a, + { + let span = Self::make_span(input, ctx); + match rule.parse(span).finish() { + Ok((remaining, x)) => Ok((*remaining.fragment(), x)), + Err(err) => match err.data { + None => { + let inner = nom::error::VerboseError { + errors: err + .inner + .errors + .into_iter() + .map(|(span, kind)| (*span.fragment(), kind)) + .collect(), + }; + Err(E::from_parse_error(input, &inner)) + } + Some(err) => Err(err), + }, + } + } +} + +#[derive(Default, Clone)] +pub enum PackratAction<'a, T> { + // Keep that variant as the first, so that its discriminant is (probably) 0 + // so efficient zeroing can be used when initializing large amounts of + // memory. + #[default] + Seed, + + // The size of this enum is critical for performance, as there will be one + // value allocated in advance per input position and per left recursive + // rule. This can lead to having to initialize large amounts of memory, so + // it needs to be as small as possible. Therefore we put the data behind a + // dynamic allocation. + Succeed(OwnedScratchBox<'a, LocatedState, Rc>), + Fail, +} + +macro_rules! __if_set_else { + ({$($true:tt)*} {$($false:tt)*}) => { + $($false)* + }; + ({$($true:tt)*} {$($false:tt)*} $($_:tt)+) => { + $($true)* + }; +} + +pub(crate) use __if_set_else; + +// Allow defining grammar production rules with most of the boilerplate +// removed and automatic context() added +macro_rules! grammar { + ( + name: $vis:vis $grammar_name:ident, + ctx: $ctx:ty, + error: $grammar_error:ty, + rules: { + $( $(#[$rec:meta])? rule $name:ident $(<$($generics:tt $(: $bound:tt)?),*>)? ($($param:ident: $param_ty:ty),*) -> $ret:ty $body:block)* + }) => { + $vis struct $grammar_name (); + + // Create the state struct in a fresh scope so it will not + // conflict with any other state structs. It also allows using "use" + // without polluting the surrounding scope. + const _: () = { + use $crate::grammar::{PackratGrammar, PackratAction, Span}; + use ::nom::error::context; + + #[allow(non_camel_case_types)] + #[derive(Default, Clone)] + pub struct PackratState<'i> { + $( + #[allow(dead_code)] + $name: $crate::grammar::__if_set_else! { + { + PackratAction<'i, $ret> + } + {()} + $($rec)? + }, + )* + __internal_phantom_lifetime: std::marker::PhantomData<&'i ()>, + } + + impl PackratGrammar for $grammar_name { + // Using Rc<> allows cloning the LocatedSpan while sharing + // the packrat state. + type State<'i> = PackratState<'i>; + type Error = $grammar_error; + type Ctx<'i> = $ctx; + } + + impl $grammar_name { + $( + $vis fn $name<'i, 'ret, $($($generics $(: $bound)?,)*)? E>($($param: $param_ty),*) -> impl ::nom::Parser, $ret, E> + 'ret + where + E: 'ret + + ::nom::error::ParseError> + + ::nom::error::ContextError> + + ::nom::error::FromExternalError, $grammar_error>, + $($($generics: 'ret),*)? + { + // Wrap the body in a closure to avoid recursive type issues + // when a rule is recursive, and add a context for free. + // + // Also, this allows to implement packrat parsing + // modified to support left recursive grammar. + move |input: Span<'i, $grammar_name>| { + $crate::grammar::__if_set_else! { + { + use std::{ + ops::Deref as _, + }; + + let parser = move |input| $body.parse(input); + let orig_pos = input.location_offset(); + let mut packrat = input.extra.1.deref().borrow_mut(); + let state = &mut packrat[orig_pos].$name; + + match state { + PackratAction::Seed => { + // Will make any recursive invocation of the rule fail + *state = PackratAction::Fail; + drop(packrat); + + // Parse once, with no recursive call allowed to + // succeed. This provides the seed result that + // will be reinjected at the next attempt. + let mut res = context(concat!(stringify!($name), " (seed)"), parser).parse(input.clone())?; + + loop { + let (i, seed) = &res; + let pos = i.location_offset(); + + { + // Re-borrow the RefCell so that it does not + // appear as borrowed when running the rule. + let mut packrat = input.extra.1.deref().borrow_mut(); + let state = &mut packrat[orig_pos].$name; + // Set the seed, which will make any + // recursive call to that rule succeed with + // that result. + *state = PackratAction::Succeed( + $crate::scratch::OwnedScratchBox::new_in( + $crate::grammar::LocatedState { + state: seed.clone(), + pos, + }, + ::std::rc::Rc::clone(&input.extra.2), + ) + ); + } + + // Parse again with the seed in place, so + // that recursive call succeed and we can + // try to match what comes after + let res2 = context(concat!(stringify!($name), " (reparse)"), parser).parse(input.clone())?; + + let (i2, _x2) = &res2; + let pos2 = i2.location_offset(); + + // If we consumed the whole input, we have + // the best match possible. + if i2.fragment().len() == 0 { + return Ok(res2) + } else if pos >= pos2 { + return Ok(res) + // If this resulted in a longer match, take + // it and loop again. Otherwise, we found + // the best match. + } else { + res = res2; + } + } + } + PackratAction::Succeed(data) => { + let val = (&data.state).clone(); + let pos = data.pos; + drop(packrat); + let (input, _) = ::nom::bytes::complete::take(pos - input.location_offset()).parse(input)?; + context(concat!(stringify!($name), " (pre-parsed)"), success(val))(input) + } + PackratAction::Fail => { + drop(packrat); + context(concat!(stringify!($name), " (seed recursion block)"), fail)(input) + } + } + } + {$body.parse(input)} + $($rec)? + } + + } + } + + )* + } + }; + } +} +pub(crate) use grammar; + +// Allow importing the macro like any other item inside this crate + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + use crate::parser::{lexeme, parenthesized, tests::test_parser, to_str}; + + #[test] + fn packrat_test() { + fn test(input: &[u8], expected: Ast) { + let parser = TestGrammar::starting_symbol(); + let input = TestGrammar::make_span(input, &()); + test_parser(expected, input, parser); + } + + #[derive(Debug, Clone, PartialEq)] + enum Ast { + Var(std::string::String), + Add(Box, Box), + Sub(Box, Box), + Mul(Box, Box), + Div(Box, Box), + } + + use std::string::ToString; + + use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::alpha1, + combinator::{fail, recognize, success}, + multi::many1, + sequence::separated_pair, + }; + + grammar! { + name: TestGrammar, + ctx: (), + error: (), + rules: { + #[leftrec] rule literal() -> Ast { + lexeme(recognize(many1(alpha1))).map(|id: LocatedSpan<&[u8], _>| Ast::Var(to_str(id.fragment()))) + } + + #[leftrec] rule expr() -> Ast { + lexeme(alt(( + context("add", + separated_pair( + Self::expr(), + tag("+"), + Self::term(), + ).map(|(op1, op2)| Ast::Add(Box::new(op1), Box::new(op2))) + ), + context("sub", + separated_pair( + Self::expr(), + tag("-"), + Self::term(), + ).map(|(op1, op2)| Ast::Sub(Box::new(op1), Box::new(op2))) + ), + Self::term(), + ))) + } + + #[leftrec] rule term() -> Ast { + lexeme(alt(( + context("mul", + separated_pair( + Self::term(), + tag("*"), + Self::factor(), + ).map(|(op1, op2)| Ast::Mul(Box::new(op1), Box::new(op2))) + ), + context("div", + separated_pair( + Self::term(), + tag("/"), + Self::factor(), + ).map(|(op1, op2)| Ast::Div(Box::new(op1), Box::new(op2))) + ), + Self::factor(), + ))) + } + + + #[leftrec] rule factor() -> Ast { + lexeme(alt(( + Self::literal(), + context("paren", + parenthesized(Self::expr()) + ), + ))) + } + + #[leftrec] rule starting_symbol() -> Ast { + Self::expr() + } + } + } + + test(b" a ", Ast::Var("a".to_string())); + test(b"((( a)) ) ", Ast::Var("a".to_string())); + test( + b"a+b", + Ast::Add( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Var("b".to_string())), + ), + ); + test( + b"a + b", + Ast::Add( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Var("b".to_string())), + ), + ); + test( + b"a + b ", + Ast::Add( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Var("b".to_string())), + ), + ); + test( + b" a + b ", + Ast::Add( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Var("b".to_string())), + ), + ); + test( + b" a + b", + Ast::Add( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Var("b".to_string())), + ), + ); + + test( + b" a + b+c ", + Ast::Add( + Box::new(Ast::Add( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Var("b".to_string())), + )), + Box::new(Ast::Var("c".to_string())), + ), + ); + + test( + b"a+(b+c)", + Ast::Add( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Add( + Box::new(Ast::Var("b".to_string())), + Box::new(Ast::Var("c".to_string())), + )), + ), + ); + + test( + b"(a+b)+c", + Ast::Add( + Box::new(Ast::Add( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Var("b".to_string())), + )), + Box::new(Ast::Var("c".to_string())), + ), + ); + test( + b"(a+b+c)", + Ast::Add( + Box::new(Ast::Add( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Var("b".to_string())), + )), + Box::new(Ast::Var("c".to_string())), + ), + ); + test( + b"(a+b*c)", + Ast::Add( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Mul( + Box::new(Ast::Var("b".to_string())), + Box::new(Ast::Var("c".to_string())), + )), + ), + ); + test( + b"a*b+c*d", + Ast::Add( + Box::new(Ast::Mul( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Var("b".to_string())), + )), + Box::new(Ast::Mul( + Box::new(Ast::Var("c".to_string())), + Box::new(Ast::Var("d".to_string())), + )), + ), + ); + + test( + b"(a*b)/(c*d)", + Ast::Div( + Box::new(Ast::Mul( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Var("b".to_string())), + )), + Box::new(Ast::Mul( + Box::new(Ast::Var("c".to_string())), + Box::new(Ast::Var("d".to_string())), + )), + ), + ); + test( + b"(a*(b/(c*d)))", + Ast::Mul( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Div( + Box::new(Ast::Var("b".to_string())), + Box::new(Ast::Mul( + Box::new(Ast::Var("c".to_string())), + Box::new(Ast::Var("d".to_string())), + )), + )), + ), + ); + + test( + b"a*b/c*d", + Ast::Mul( + Box::new(Ast::Div( + Box::new(Ast::Mul( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Var("b".to_string())), + )), + Box::new(Ast::Var("c".to_string())), + )), + Box::new(Ast::Var("d".to_string())), + ), + ); + + test( + b"a-b/c*d", + Ast::Sub( + Box::new(Ast::Var("a".to_string())), + Box::new(Ast::Mul( + Box::new(Ast::Div( + Box::new(Ast::Var("b".to_string())), + Box::new(Ast::Var("c".to_string())), + )), + Box::new(Ast::Var("d".to_string())), + )), + ), + ); + } + + #[test] + fn packrat_recursive_test() { + fn test(input: &[u8], expected: Ast) { + let parser = TestGrammar::starting_symbol(); + let input = TestGrammar::make_span(input, &()); + test_parser(expected, input, parser); + } + + #[derive(Debug, Clone, PartialEq)] + enum Ast { + Constant(char), + CommaExpr(Box, Box), + } + + use nom::{ + branch::alt, + character::complete::char, + combinator::{fail, success}, + sequence::{delimited, separated_pair}, + }; + + grammar! { + name: TestGrammar, + ctx: (), + error: (), + rules: { + #[leftrec] rule primary_expr() -> Ast { + lexeme( + alt(( + alt(( + char('1'), + char('2'), + )).map(Ast::Constant), + delimited( + lexeme(char('(')), + Self::expr(), + lexeme(char(')')), + ) + )) + ) + } + + #[leftrec] rule comma_expr() -> Ast { + lexeme( + alt(( + separated_pair( + Self::primary_expr(), + lexeme(char(',')), + Self::comma_expr(), + ).map(|(x, y)| Ast::CommaExpr(Box::new(x), Box::new(y))), + Self::primary_expr(), + )) + ) + } + + #[leftrec] rule expr() -> Ast { + alt(( + Self::comma_expr(), + Self::primary_expr(), + )) + } + #[leftrec] rule starting_symbol() -> Ast { + Self::expr() + } + } + } + + use Ast::*; + + test(b"1", Constant('1')); + test(b"2", Constant('2')); + test( + b"(1,2)", + CommaExpr(Box::new(Constant('1')), Box::new(Constant('2'))), + ); + } +} diff --git a/tools/trace-parser/traceevent/src/header.rs b/tools/trace-parser/traceevent/src/header.rs new file mode 100644 index 0000000000..b75db9add3 --- /dev/null +++ b/tools/trace-parser/traceevent/src/header.rs @@ -0,0 +1,2461 @@ +use core::{ + borrow::Borrow, + convert::TryFrom, + ffi::CStr, + fmt::{Debug, Display, Formatter}, + hash::{Hash, Hasher}, + ops::Deref as _, + str::from_utf8, +}; +use std::{ + borrow::Cow, + collections::BTreeMap, + io, + io::Error as IoError, + rc::Rc, + string::{String as StdString, ToString}, + sync::{Arc, RwLock}, +}; + +use nom::{ + branch::alt, + bytes::complete::{is_a, is_not, tag}, + character::complete::{ + char, multispace0, multispace1, u16 as txt_u16, u32 as txt_u32, u64 as txt_u64, + }, + combinator::{all_consuming, iterator, map_res, opt, rest}, + error::context, + multi::{fold_many0, many0, separated_list0}, + sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, + Finish as _, Parser, +}; +use once_cell::sync::OnceCell; +use smartstring::alias::String; + +use crate::{ + array::Array, + buffer::{Buffer, BufferError, FieldDecoder}, + cinterp::{CompileEnv, CompileError, EvalEnv, EvalError, new_dyn_evaluator, Evaluator, Value}, + closure::closure, + compress::{Decompressor, DynDecompressor, ZlibDecompressor, ZstdDecompressor}, + cparser::{ + identifier, is_identifier, string_literal, ArrayKind, CGrammar, CGrammarCtx, Declaration, + Expr, ExtensionMacroCall, ExtensionMacroCallCompiler, ExtensionMacroCallType, + ExtensionMacroDesc, Type, + }, + grammar::PackratGrammar as _, + io::{BorrowingCursor, BorrowingRead}, + nested_pointer::NestedPointer, + parser::{ + error, failure, hex_u64, lexeme, map_res_cut, to_str, FromParseError, NomError, + NomParserExt as _, VerboseParseError, + }, + print::{parse_print_fmt, PrintAtom, PrintFmtError, PrintFmtStr, PrintSpecifier, StringWriter}, + scratch::{ScratchAlloc, ScratchVec}, + str::Str, + error::convert_err_impl, +}; + +pub type Address = u64; +pub type AddressOffset = Address; +pub type AddressSize = Address; +pub type CPU = u32; +pub type PID = u32; +pub type Timestamp = u64; +pub type TimeOffset = i64; +pub type SymbolName = String; +pub type TaskName = String; +pub type Identifier = String; +pub type EventId = u16; + +pub type FileOffset = u64; +pub type FileSize = FileOffset; + +pub type MemOffset = usize; +pub type MemSize = MemOffset; +pub type MemAlign = MemOffset; + +pub type SectionId = u16; + +#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord)] +pub enum Endianness { + Big, + Little, +} + +macro_rules! parse_N { + ($name:ident, $typ:ty) => { + #[inline] + pub fn $name<'a>(&self, input: &'a [u8]) -> Result<(&'a [u8], $typ), io::Error> { + let arr = input + .get(..std::mem::size_of::<$typ>()) + .ok_or(io::Error::from(io::ErrorKind::UnexpectedEof))?; + let arr: [u8; std::mem::size_of::<$typ>()] = arr.try_into().unwrap(); + let x = match self { + Endianness::Big => <$typ>::from_be_bytes(arr), + Endianness::Little => <$typ>::from_le_bytes(arr), + }; + let input = &input[std::mem::size_of::<$typ>()..]; + Ok((input, x)) + } + }; +} + +impl Endianness { + fn native() -> Self { + if cfg!(target_endian = "big") { + Endianness::Big + } else if cfg!(target_endian = "little") { + Endianness::Little + } else { + panic!("Cannot handle endianness") + } + } + + pub fn is_native(&self) -> bool { + self == &Self::native() + } + + parse_N!(parse_u64, u64); + parse_N!(parse_u32, u32); + parse_N!(parse_u16, u16); + parse_N!(parse_u8, u8); +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LongSize { + Bits32, + Bits64, +} + +impl From for u64 { + fn from(size: LongSize) -> Self { + match size { + LongSize::Bits32 => 4, + LongSize::Bits64 => 8, + } + } +} +impl From for usize { + fn from(size: LongSize) -> Self { + match size { + LongSize::Bits32 => 4, + LongSize::Bits64 => 8, + } + } +} + +impl TryFrom for LongSize { + type Error = usize; + + fn try_from(size: usize) -> Result { + match size { + 4 => Ok(LongSize::Bits32), + 8 => Ok(LongSize::Bits64), + x => Err(x), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Signedness { + Signed, + Unsigned, +} + +impl Display for Signedness { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + let s = match self { + Signedness::Signed => "signed", + Signedness::Unsigned => "unsigned", + }; + f.write_str(s) + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Abi { + pub endianness: Endianness, + pub long_size: LongSize, + pub char_signedness: Signedness, +} + +macro_rules! abi_parse_N { + ($name:ident, $typ:ty) => { + #[inline] + pub fn $name<'a>(&self, input: &'a [u8]) -> Result<(&'a [u8], $typ), io::Error> { + self.endianness.$name(input) + } + }; +} + +impl Abi { + abi_parse_N!(parse_u64, u64); + abi_parse_N!(parse_u32, u32); + abi_parse_N!(parse_u16, u16); + abi_parse_N!(parse_u8, u8); + + #[inline] + pub fn char_typ(&self) -> Type { + match self.char_signedness { + Signedness::Unsigned => Type::U8, + Signedness::Signed => Type::I8, + } + } + + #[inline] + pub fn long_typ(&self) -> Type { + match self.long_size { + LongSize::Bits32 => Type::I32, + LongSize::Bits64 => Type::I64, + } + } + + #[inline] + pub fn ulong_typ(&self) -> Type { + match self.long_size { + LongSize::Bits32 => Type::U32, + LongSize::Bits64 => Type::U64, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BufferId { + pub cpu: CPU, + pub name: String, +} + +#[derive(Debug, Clone)] +pub struct BufferLocation { + pub id: BufferId, + pub offset: FileOffset, + pub size: FileSize, +} + +#[derive(Debug, Clone)] +pub(crate) struct HeaderV6 { + pub(crate) kernel_abi: Abi, + pub(crate) page_size: FileSize, + pub(crate) event_descs: Vec, + pub(crate) kallsyms: BTreeMap, + pub(crate) str_table: BTreeMap, + pub(crate) pid_comms: BTreeMap, + pub(crate) options: Vec, + pub(crate) top_level_buffer_locations: Vec, + pub(crate) nr_cpus: CPU, +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub(crate) struct HeaderV7 { + pub(crate) kernel_abi: Abi, + pub(crate) page_size: FileSize, + pub(crate) event_descs: Vec, + pub(crate) kallsyms: BTreeMap, + pub(crate) str_table: BTreeMap, + pub(crate) pid_comms: BTreeMap, + pub(crate) options: Vec, + pub(crate) nr_cpus: CPU, +} + +#[derive(Debug, Clone)] +enum VersionedHeader { + V6(HeaderV6), + V7(HeaderV7), +} + +#[derive(Debug, Clone)] +pub struct Header { + // We have this inner layer so the publicly exposed struct is completely + // opaque. An enum cannot be opaque. + inner: VersionedHeader, +} + +macro_rules! attr { + ($header:expr, $attr:ident) => { + match $header { + Header { + inner: + VersionedHeader::V6(HeaderV6 { $attr, .. }) + | VersionedHeader::V7(HeaderV7 { $attr, .. }), + } => $attr, + } + }; +} + +impl Header { + #[inline] + pub fn deref_static(&self, addr: Address) -> Result, EvalError> { + match attr!(self, str_table).get(&addr) { + Some(s) => Ok(Value::Str(Str::new_borrowed(s.deref()))), + None => Err(EvalError::CannotDeref(addr)), + } + } + + #[inline] + pub fn event_descs(&self) -> impl IntoIterator { + attr!(self, event_descs) + } + + #[inline] + pub fn event_desc_by_id(&self, id: EventId) -> Option<&EventDesc> { + self.event_descs() + .into_iter() + .find(move |desc| desc.id == id) + } + + #[inline] + pub fn event_desc_by_name(&self, name: &str) -> Option<&EventDesc> { + self.event_descs() + .into_iter() + .find(move |desc| desc.name == name) + } + + #[inline] + pub fn kernel_abi(&self) -> &Abi { + attr!(self, kernel_abi) + } + + #[inline] + pub fn comm_of(&self, pid: PID) -> Option<&TaskName> { + attr!(self, pid_comms).get(&pid) + } + + pub fn sym_at(&self, addr: Address) -> Option<(AddressOffset, Option, &str)> { + use std::ops::Bound::{Excluded, Included, Unbounded}; + if addr == 0 { + None + } else { + let map = attr!(self, kallsyms); + let next_addr = map + .range((Excluded(addr), Unbounded)) + .next() + .map(|(addr, _)| addr); + map.range((Unbounded, Included(addr))) + .last() + .map(|(base, s)| { + let size = next_addr.map(|next| next - addr); + let offset = addr - base; + (offset, size, s.deref()) + }) + } + } + + #[inline] + pub fn nr_cpus(&self) -> CPU { + *attr!(self, nr_cpus) + } + + #[inline] + pub fn options(&self) -> impl IntoIterator { + attr!(self, options) + } + + #[inline] + pub fn kallsyms(&self) -> impl IntoIterator { + attr!(self, kallsyms).into_iter().map(|(k, v)| { + (*k, v.deref()) + }) + } + + #[inline] + pub fn pid_comms(&self) -> impl IntoIterator { + attr!(self, pid_comms).into_iter().map(|(k, v)| { + (*k, v.deref()) + }) + } + + pub(crate) fn timestamp_fixer(&self) -> impl Fn(Timestamp) -> Timestamp { + let mut offset_signed: i64 = 0; + let mut offset_unsigned: u64 = 0; + let mut _multiplier: u32 = 1; + let mut _shift: u32 = 0; + + for opt in self.options() { + match opt { + Options::TimeOffset(offset) => { + offset_signed += *offset; + } + Options::TSC2NSec { + multiplier, + shift, + offset, + } => { + offset_unsigned += *offset; + _multiplier = *multiplier; + _shift = *shift; + } + _ => (), + } + } + + move |ts: Timestamp| { + let ts: u128 = ts.into(); + let ts = (ts * _multiplier as u128) >> _shift; + let ts = ts as u64; + ts.saturating_add_signed(offset_signed) + offset_unsigned + } + } + + pub fn buffers<'i, 'h, 'a: 'i + 'h, I: BorrowingRead + Send + 'i>( + &'a self, + input: I, + ) -> Result>, BufferError> { + match &self.inner { + VersionedHeader::V6(header) => header.buffers(self, input), + VersionedHeader::V7(header) => header.buffers(self, input), + } + } + + #[inline] + fn fixup_event_descs(&mut self) { + let mut header = self.clone(); + // Ensure we won't accidentally lookup an event descriptor with a broken parent link on the + // header clone. This also saves some memory. + *attr!(&mut header, event_descs) = Vec::new(); + + let header: Arc<_> = header.into(); + for event_desc in attr!(self, event_descs) { + event_desc.header = Some(Arc::clone(&header)) + } + } +} + +#[derive(Clone)] +pub struct FieldFmt { + pub declaration: Declaration, + pub offset: MemOffset, + pub size: MemSize, + + pub decoder: Arc, +} + +// This instance is used for testing for now. We compare everything except the +// decoder, which cannot be compared as it is some sort of closure. +impl PartialEq for FieldFmt { + fn eq(&self, other: &Self) -> bool { + self.declaration == other.declaration + && self.offset == other.offset + && self.size == other.size + } +} + +impl Debug for FieldFmt { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + f.debug_struct("FieldFmt") + .field("declaration", &self.declaration) + .field("offset", &self.offset) + .field("size", &self.size) + .finish_non_exhaustive() + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct StructFmt { + pub fields: Vec, +} + +impl StructFmt { + pub fn field_by_name(&self, name: &Q) -> Option<&FieldFmt> + where + Q: ?Sized, + Identifier: Borrow + PartialEq, + { + self.fields + .iter() + .find(|&field| &field.declaration.identifier == name.borrow()) + } +} + +fn fixup_c_type( + typ: Type, + size: MemSize, + signedness: Signedness, + abi: &Abi, +) -> Result { + let inferred_size = typ.size(abi).ok(); + let inferred_signedness = typ.arith_info().map(|info| info.signedness()); + + if let Some(inferred_size) = inferred_size { + if let Ok(size) = size.try_into() { + if inferred_size != size { + return Err(HeaderError::InvalidTypeSize { + typ, + inferred_size, + size, + }); + } + } + } + + if let Some(inferred_signedness) = inferred_signedness { + if inferred_signedness != signedness { + return Err(HeaderError::InvalidTypeSign { + typ, + inferred_signedness, + signedness, + }); + } + } + + // Primarily fixup Enum where the underlying type is unknown to the C + // parser, but here we have a chance of actually fixing it up since we + // usually know the size and signednessness of the type. + fn fixup(typ: Type, size: Option, signedness: Signedness) -> Type { + match typ { + Type::Array(typ, ArrayKind::Fixed(Ok(0))) if size == Some(0) => Type::Array( + Box::new(fixup(*typ, None, signedness)), + ArrayKind::ZeroLength, + ), + Type::Array(typ, ArrayKind::Fixed(array_size)) => { + let item_size = match (size, &array_size) { + (Some(size), Ok(array_size)) => { + let array_size: usize = (*array_size).try_into().unwrap(); + Some(size / array_size) + } + _ => None, + }; + Type::Array( + Box::new(fixup(*typ, item_size, signedness)), + ArrayKind::Fixed(array_size), + ) + } + Type::Array(typ, kind @ ArrayKind::Dynamic(_)) => { + Type::Array(Box::new(fixup(*typ, None, signedness)), kind) + } + + Type::Typedef(typ, id) => Type::Typedef(Box::new(fixup(*typ, size, signedness)), id), + Type::Enum(typ, id) => Type::Enum(Box::new(fixup(*typ, size, signedness)), id), + Type::Unknown => match (size.map(|x| x * 8), signedness) { + (Some(8), Signedness::Unsigned) => Type::U8, + (Some(8), Signedness::Signed) => Type::I8, + + (Some(16), Signedness::Unsigned) => Type::U16, + (Some(16), Signedness::Signed) => Type::I16, + + (Some(32), Signedness::Unsigned) => Type::U32, + (Some(32), Signedness::Signed) => Type::I32, + + (Some(64), Signedness::Unsigned) => Type::U64, + (Some(64), Signedness::Signed) => Type::I64, + _ => Type::Unknown, + }, + typ => typ, + } + } + + Ok(fixup(typ, Some(size), signedness)) +} + +type HeaderNomError<'a> = NomError>; + +#[inline(never)] +fn parse_struct_fmt<'a>( + abi: &Abi, + skip_fixup: bool, + input: &'a [u8], +) -> nom::IResult<&'a [u8], StructFmt, HeaderNomError<'a>> { + terminated( + separated_list0( + char('\n'), + map_res_cut( + preceded( + lexeme(tag(b"field:")), + separated_pair( + is_not(";"), + char(';'), + terminated( + separated_list0( + char(';'), + separated_pair( + preceded(is_a("\t "), is_not("\n:").map(to_str)), + char(':'), + is_not(";").map(to_str), + ), + ), + char(';'), + ), + ), + ), + move |(declaration, props)| { + let props = BTreeMap::from_iter(props); + macro_rules! get { + ($name:expr) => { + props + .get($name) + .expect(concat!("Expected field property", $name)) + .parse() + .expect("Failed to parse field property value") + }; + } + + let (_, mut declaration) = CGrammar::apply_rule( + all_consuming(CGrammar::declaration()), + declaration, + &CGrammarCtx::new(abi), + ) + .map_err(|_| HeaderError::InvalidDeclaration)?; + + let signedness = { + let signed: u8 = get!("signed"); + if signed > 0 { + Signedness::Signed + } else { + Signedness::Unsigned + } + }; + let size = get!("size"); + if !skip_fixup { + declaration.typ = fixup_c_type(declaration.typ, size, signedness, abi)?; + } + + Ok(FieldFmt { + declaration, + offset: get!("offset"), + size, + decoder: Arc::new(()), + }) + }, + ), + ), + opt(char('\n')), + ) + .map(|fields| StructFmt { fields }) + .parse(input) +} + +#[inline(never)] +fn parse_header_event(input: &[u8]) -> nom::IResult<&[u8], (), HeaderNomError<'_>> { + map_res( + preceded( + opt(lexeme(preceded(char('#'), many0(is_not("\n"))))), + fold_many0( + terminated( + alt(( + separated_pair( + |input| match identifier::<_, ()>().parse(input) { + Ok(id) => Ok(id), + Err(_) => error(input, HeaderError::InvalidCIdentifier), + }, + char(':'), + delimited( + opt(pair(lexeme(tag("type")), lexeme(tag("==")))), + lexeme(txt_u64), + opt(lexeme(tag("bits"))), + ), + ) + .map(|(id, n)| match (id.as_ref(), n) { + ("type_len", 5) => Ok(()), + ("type_len", x) => Err(HeaderError::InvalidEventHeader { + field: "type_len".into(), + value: x.to_string(), + }), + + ("time_delta", 27) => Ok(()), + ("time_delta", x) => Err(HeaderError::InvalidEventHeader { + field: "time_delta".into(), + value: x.to_string(), + }), + + ("array", 32) => Ok(()), + ("array", x) => Err(HeaderError::InvalidEventHeader { + field: "array".into(), + value: x.to_string(), + }), + + ("padding", 29) => Ok(()), + ("padding", x) => Err(HeaderError::InvalidEventHeader { + field: "padding".into(), + value: x.to_string(), + }), + + ("time_extend", 30) => Ok(()), + ("time_extend", x) => Err(HeaderError::InvalidEventHeader { + field: "time_extend".into(), + value: x.to_string(), + }), + + ("time_stamp", 31) => Ok(()), + ("time_stamp", x) => Err(HeaderError::InvalidEventHeader { + field: "time_stamp".into(), + value: x.to_string(), + }), + _ => Ok(()), + }), + preceded( + tuple(( + lexeme(tag("data")), + lexeme(tag("max")), + lexeme(tag("type_len")), + lexeme(tag("==")), + )), + lexeme(txt_u64).map(|bits| match bits { + 28 => Ok(()), + x => Err(HeaderError::InvalidEventHeader { + field: "data max type_len".into(), + value: x.to_string(), + }), + }), + ), + )), + opt(many0(char('\n'))), + ), + || Ok(()), + |acc, i| match acc { + Ok(..) => i, + Err(..) => acc, + }, + ), + ), + // Simplify the return type by "promoting" validation errors into a parse + // error. + |res| res, + ) + .parse(input) +} + +#[derive(Debug, Clone)] +pub struct EventDesc { + pub name: String, + pub id: EventId, + // Use a OnceCell so that we can mutate it in place in order to lazily parse + // the format and memoize the result. + fmt: OnceCell>, + raw_fmt: Vec, + header: Option>, +} + +#[derive(Clone)] +pub struct EventFmt { + struct_fmt: Result, + print_fmt: Result, + print_args: Result, CompileError>>, HeaderError>, +} + +impl EventFmt { + pub fn struct_fmt(&self) -> Result<&StructFmt, HeaderError> { + match &self.struct_fmt { + Ok(x) => Ok(x), + Err(err) => Err(err.clone()), + } + } + + pub fn print_fmt(&self) -> Result<&PrintFmtStr, HeaderError> { + match &self.print_fmt { + Ok(x) => Ok(x), + Err(err) => Err(err.clone()), + } + } + + pub fn print_args(&self) -> Result<&[Result, CompileError>], HeaderError> { + match &self.print_args { + Ok(x) => Ok(x), + Err(err) => Err(err.clone()), + } + } +} + +impl Debug for EventFmt { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + f.debug_struct("EventFmt") + .field("struct_fmt", &self.struct_fmt) + .field("print_fmt", &self.print_fmt) + .finish_non_exhaustive() + } +} + +impl PartialEq for EventFmt { + fn eq(&self, other: &Self) -> bool { + self.struct_fmt == other.struct_fmt && self.print_fmt == other.print_fmt + } +} + +impl EventDesc { + // Allow for errors in case we decide to drop the raw_fmt once it has been parsed + #[inline] + pub fn raw_fmt(&self) -> Result<&[u8], HeaderError> { + Ok(&self.raw_fmt) + } + + #[inline] + pub fn event_fmt(&self) -> Result<&EventFmt, HeaderError> { + match self + .fmt + .get_or_init(|| parse_event_fmt(self.header(), &self.name, &self.raw_fmt)) + { + Ok(x) => Ok(x), + Err(err) => Err(err.clone()), + } + } + + // This method is private as the header that we get through here is not complete and we might + // re-implement what depends on a full header to only depend on some bits. It can also panic in + // some circumstances + #[inline] + fn header(&self) -> &Header { + // Not having the parent link can happen if: + // * We try to use the EventDesc before Header::fixup_event_descs() was called + // * We try to use an EventDesc obtained from a copy of the header before the fixup was + // done. This can happen if soemone tries to access EventDesc attached to the header that + // was stored in an Rc<> to provide a parent to EventDesc attached to the primary Header + self.header + .as_ref() + .expect("EventDesc does not have a parent Header link") + } +} + +impl PartialEq for EventDesc { + fn eq(&self, other: &Self) -> bool { + self.id == other.id && self.name == other.name && self.raw_fmt == other.raw_fmt + } +} +impl Eq for EventDesc {} + +impl Hash for EventDesc { + fn hash(&self, state: &mut H) + where + H: Hasher, + { + self.id.hash(state); + self.name.hash(state); + self.raw_fmt.hash(state); + } +} + +#[inline(never)] +fn parse_event_fmt<'a>( + header: &'a Header, + name: &'a str, + input: &[u8], +) -> Result { + context( + "event description", + tuple(( + context( + "event format", + preceded( + pair(lexeme(tag("format:")), multispace0), + |input| parse_struct_fmt(header.kernel_abi(), false, input), + ) + .map(|mut fmt| { + for field in &mut fmt.fields { + field.decoder = + Arc::new(match field.declaration.typ.make_decoder(header) { + Ok(parser) => parser, + Err(err) => { + Box::new(closure!( + ( + for<'d> Fn( + &'d [u8], + &'d [u8], + &'d Header, + &'d ScratchAlloc, + ) + -> Result, BufferError> + ), + move |_, _, _, _| Err(err.clone().into()) + )) + } + }); + } + fmt + }), + ), + context( + "event print fmt", + preceded( + pair(multispace0, lexeme(tag("print fmt:"))), + all_consuming(lexeme(map_res_cut( + move |input| { + match CGrammar::apply_rule( + CGrammar::expr(), + input, + &CGrammarCtx::new(header.kernel_abi()), + ) { + Ok((remaining, x)) => { + let consumed = input.len() - remaining.len(); + Ok((&input[consumed..], x)) + }, + Err(err) => failure( + input, + PrintFmtError::CParseError(Box::new(err)).into(), + ), + } + }, + |expr| { + let (fmt, exprs) = match expr { + Expr::CommaExpr(vec) => { + let mut iter = vec.into_iter(); + let s = iter.next(); + let args: Vec = iter.collect(); + + let s = match s { + Some(Expr::StringLiteral(s)) => Ok(s), + Some(expr) => Err(PrintFmtError::NotAStringLiteral(expr)), + None => panic!("CommaExpr is expected to contain at least one expression"), + }?; + Ok((s, args)) + } + Expr::StringLiteral(s) => Ok((s, vec![])), + expr => Err(PrintFmtError::NotAStringLiteral(expr)), + }?; + let fmt = parse_print_fmt(header, fmt.as_bytes()); + Ok((fmt, exprs)) + }, + ))), + ), + ), + )) + .map(|(struct_fmt, (print_fmt, print_args))| { + struct CEnv<'h> { + header: &'h Header, + struct_fmt: &'h StructFmt, + scratch: ScratchAlloc, + } + + impl<'h> EvalEnv<'h> for CEnv<'h> { + #[inline] + fn deref_static(&self, addr: u64) -> Result, EvalError> { + self.header.deref_static(addr) + } + + #[inline] + fn scratch(&self) -> &ScratchAlloc { + &self.scratch + } + + fn header(&self) -> Result<&Header, EvalError> { + Ok(self.header) + } + + fn event_data(&self) -> Result<&[u8], EvalError> { + Err(EvalError::NoEventData) + } + } + + impl<'ce> CompileEnv<'ce> for CEnv<'ce> { + #[inline] + fn field_typ(&self, id: &str) -> Result { + for field in &self.struct_fmt.fields { + if field.declaration.identifier == id { + return Ok(field.declaration.typ.clone()); + } + } + Err(CompileError::UnknownField(id.into())) + } + + #[inline] + fn field_getter( + &self, + id: &str, + ) -> Result< + Box, + CompileError, + > { + for field in &self.struct_fmt.fields { + if field.declaration.identifier == id { + let decoder = field.decoder.clone(); + let offset = field.offset; + let end = offset + field.size; + + fn make_box(f: F) -> Box + where + F: for<'ee, 'eeref> Fn(&'eeref (dyn EvalEnv<'ee> + 'eeref)) -> Result, EvalError> + Send + Sync + 'static { + Box::new(f) + } + + return Ok(make_box(move |env: &dyn EvalEnv<'_>| { + let event_data = env.event_data()?; + let header = env.header()?; + let field_data = &event_data[offset..end]; + Ok(decoder.decode( + event_data, + field_data, + header, + env.scratch(), + )?) + })); + } + } + Err(CompileError::UnknownField(id.into())) + } + } + + let env = CEnv { + header, + struct_fmt: &struct_fmt, + scratch: ScratchAlloc::new(), + }; + + // bprint format is a lie, so we deal with it by replacing + // REC->fmt by an extension function call that formats REC->fmt + // and REC->buf into a string. + // + // We only do the fixup if the format string contains a "%s" + // called with a REC->fmt argument, so we stay + // forward-compatible with future fixes. + // https://bugzilla.kernel.org/show_bug.cgi?id=217357 + let fixup_arg = move |(expr, atom): (Expr, _)| { + match atom { + Some(&PrintAtom::Variable {print_spec: PrintSpecifier::Str, ..}) if name == "bprint" && expr.is_record_field("fmt") => { + macro_rules! compiler { + ($char_typ:expr) => { + ExtensionMacroCallCompiler { + ret_typ: ExtensionMacroCallType::Type(Type::Pointer(Box::new($char_typ))), + compiler: Arc::new(|cenv: &dyn CompileEnv, _abi: &_| { + let fmt = cenv.field_getter("fmt")?; + let buf = cenv.field_getter("buf")?; + Ok(new_dyn_evaluator({ + // Cache the parsed format string + let fmt_map: RwLock> = RwLock::new(BTreeMap::new()); + move |env| { + let header = env.header()?; + // Get the format string and its parsed PrintFmtStr + let fmt_addr = match fmt.eval(env)? { + Value::U64Scalar(addr) => Ok(addr), + Value::I64Scalar(addr) => Ok(addr as u64), + val => Err(EvalError::IllegalType(val.into_static().ok())) + }?; + + let mut _fmt_map_read; + let mut _fmt_map_write; + let fmt = { + _fmt_map_read = fmt_map.read().unwrap(); + match _fmt_map_read.get(&fmt_addr) { + Some(fmt) => fmt, + None => { + drop(_fmt_map_read); + let fmt = env.deref_static(fmt_addr)?; + let fmt = match fmt.to_str() { + Some(s) => Ok(s), + None => Err(EvalError::IllegalType(fmt.into_static().ok())), + }?; + + let fmt = parse_print_fmt(header, fmt.as_bytes())?; + _fmt_map_write = fmt_map.write().unwrap(); + _fmt_map_write.entry(fmt_addr).or_insert(fmt) + } + } + + }; + + // Get the vbin buffer + let buf = buf.eval(env)?; + + match buf { + Value::U32Array(array) => { + let array: &[u32] = array.deref(); + + let mut vec = ScratchVec::new_in(env.scratch()); + let mut writer = StringWriter::new(&mut vec); + + // We don't attempt + // to use + // Str::new_procedural() + // as there are too + // many ways the + // interpolation + // could fail, and + // procedural + // strings cannot + // fail other than + // panicking + fmt.interpolate_vbin(header, env, &mut writer, array)?; + // This "leaks" the + // ScratchVec, but + // it will be freed + // after processing + // that event by + // whoever created + // "env" + Ok(Value::U8Array(Array::Borrowed(vec.leak()))) + } + val => Err(EvalError::IllegalType(val.into_static().ok())), + } + } + })) + }) + } + } + } + + let char_typ = header.kernel_abi().char_typ(); + + let compiler = compiler!(char_typ.clone()); + let desc = ExtensionMacroDesc::new_function_like( + "__format_vbin_printf".into(), + Box::new(move |input| { + Ok(( + input, + compiler!(char_typ.clone()), + )) + }) + ); + + Expr::ExtensionMacroCall(ExtensionMacroCall { + args: "REC->fmt, REC->buf, __get_zero_length_array_len(REC->buf)".into(), + desc: Arc::new(desc), + compiler + }) + }, + _ => expr + } + }; + + let print_args = match &print_fmt { + Ok(print_fmt) => { + Ok(PrintAtom::zip_atoms( + print_args.into_iter(), + print_fmt.atoms.iter() + ) + .into_iter() + .map(fixup_arg) + .map(|expr| Ok( + Arc::from(expr.compile(&env, header.kernel_abi())?)) + ) + .collect()) + } + Err(err) => Err(err.clone()), + }; + let print_fmt = print_fmt.map_err(Into::into); + let print_args = print_args.map_err(Into::into); + // TODO: We could possibly exploit e.g. __print_symbolic() to fixup the enum + // variants, based on the print_fmt, but: + // 1. The strings are arbitrary and cannot be expected to be valid identifier + // matching any original enum. That is probably fine but might be a bit + // surprising. + // 2. We cannot count on the strings described by __print_symbolic() cover all the + // cases. + EventFmt { + // For now, we just fail if we cannot at least parse the StructFmt, since the + // resulting EventFmt would be quite useless. This might change in the future + // if necessary. + struct_fmt: Ok(struct_fmt), + print_fmt, + print_args, + } + }), + ) + .parse_finish(input) +} + +#[inline(never)] +fn parse_event_desc(input: &[u8]) -> nom::IResult<&[u8], EventDesc, HeaderNomError<'_>> { + context( + "event description", + map_res_cut( + tuple(( + context( + "event name", + preceded( + lexeme(tag("name:")), + lexeme(terminated(is_not("\n"), char('\n'))), + ), + ), + context("event ID", preceded(lexeme(tag("ID:")), lexeme(txt_u16))), + context("remainder", rest), + )), + |(name, id, fmt)| { + Ok(EventDesc { + name: StdString::from_utf8_lossy(name).into(), + id, + fmt: OnceCell::new(), + // Store the unparsed content, as parsing is costly and only + // a handful of events will typically be actually used in + // the trace. + raw_fmt: fmt.to_vec(), + // Will be fixed up later + header: None, + }) + }, + ), + ) + .parse(input) +} + +#[inline(never)] +fn parse_kallsyms( + input: &[u8], +) -> nom::IResult<&[u8], BTreeMap, HeaderNomError<'_>> { + context("kallsyms", move |input| { + let line = terminated( + separated_pair( + hex_u64, + delimited(multispace1, is_not(" \t"), multispace1), + map_res_cut( + pair( + is_not("\t\n"), + // The symbol name can be followed by \t[module_name], + // so we consume the \t in between to provide cleaner + // output. + opt(preceded(is_a(" \t"), is_not("\n"))), + ), + |(name, module)| match from_utf8(name) { + // Filter-out symbols starting with "$" as they are probably just mapping + // symbols that can sometimes have the same value as real function symbols, + // thereby breaking the output. (see "ELF for the Arm© 64-bit Architecture + // (AArch64)" document). + // Also filter out all the compiler-generated symbols, e.g. ones that have + // a suffix as a result of some optimization pass. + Ok(name) if is_identifier(name) => Ok(Some(match module.map(from_utf8) { + Some(Ok(module)) => { + let mut full: SymbolName = name.into(); + full.push_str(" "); + full.push_str(module); + full + } + _ => name.into(), + })), + Ok(_) => Ok(None), + Err(err) => Err(HeaderError::DecodeUtf8(err.to_string())), + }, + ), + ), + char('\n'), + ); + + let mut it = iterator(input, line); + let parsed = it + .filter_map(|item| match item { + (addr, Some(name)) => Some(Ok((addr, name))), + _ => None, + }) + .collect::, _>>()?; + let (input, _) = it.finish()?; + Ok((input, parsed)) + }) + .parse(input) +} + +#[inline(never)] +fn parse_str_table( + input: &[u8], +) -> nom::IResult<&[u8], BTreeMap, HeaderNomError<'_>> { + context("trace_printk fmt", move |input| { + let line = separated_pair( + preceded(tag("0x"), hex_u64), + lexeme(char(':')), + move |input| { + // Gather the line using a fast parser, otherwise invoking the C + // parser on the whole input will allocate large amounts of + // memory for the packrat state, out of which only the first few + // tens of positions will be used. This can take seconds in + // debug builds. + let (input, line) = terminated(is_not("\n"), char('\n')).parse(input)?; + let res: Result<(_, _), NomError<_, _>> = + all_consuming(string_literal()).parse(line).finish(); + match res { + Ok((_, Expr::StringLiteral(s))) => Ok((input, s)), + Ok((_, expr)) => failure( + input, + PrintFmtError::NotAStringLiteral(expr).into(), + ), + Err(err) => err.into_external(input, |data| { + PrintFmtError::CParseError( + Box::new(data), + ).into() + }), + } + }, + ); + let mut it = iterator(input, line); + let parsed = it.collect::>(); + let (input, _) = it.finish()?; + Ok((input, parsed)) + }) + .parse(input) +} + +#[inline(never)] +fn parse_pid_comms(input: &[u8]) -> nom::IResult<&[u8], BTreeMap, HeaderNomError<'_>> { + context("PID map", move |input| { + let line = separated_pair( + txt_u32, + multispace1, + map_res_cut(lexeme(is_not("\n")), |x| match from_utf8(x) { + Ok(s) => Ok(s.into()), + Err(err) => Err(HeaderError::DecodeUtf8(err.to_string())), + }), + ); + let mut it = iterator(input, line); + let parsed = it.collect::>(); + let (input, _) = it.finish()?; + Ok((input, parsed)) + }) + .parse(input) +} + +#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum HeaderError { + #[error("Bad magic found")] + BadMagic, + + #[error("Could not decode UTF-8 string: {0}")] + DecodeUtf8(StdString), + + #[error("Could not parse file format version: {0}")] + InvalidVersion(StdString), + + #[error("Expected 0 or 1 for endianness, got: {0}")] + InvalidEndianness(u8), + + #[error("Could not parse C declaration")] + InvalidDeclaration, + + #[error("Could not parse C identifier")] + InvalidCIdentifier, + + #[error( + "Size of type \"{typ:?}\" was inferred to be {inferred_size} but kernel reported {size}" + )] + InvalidTypeSize { + typ: Type, + inferred_size: u64, + size: u64, + }, + + #[error("Sign of type \"{typ:?}\" was inferred to be {inferred_signedness:?} but kernel reported {signedness}")] + InvalidTypeSign { + typ: Type, + inferred_signedness: Signedness, + signedness: Signedness, + }, + + #[error("Invalid long size: {0}")] + InvalidLongSize(MemSize), + + #[error("Invalid option in header")] + InvalidOption, + + #[error("Compression codec not supported: {0}")] + UnsupportedCompressionCodec(StdString), + + #[error("Compressed section was found but header specified no compression codec")] + CompressedSectionWithoutCodec, + + #[error("Expected section ID {expected} but found {found}")] + UnexpectedSection { + expected: SectionId, + found: SectionId, + }, + + #[error("Expected header page start")] + ExpectedHeaderPage, + + #[error("Could not load section as it is too large: {0} bytes")] + SectionTooLarge(u64), + + #[error("Could not load header page size as it is too large: {0} bytes")] + PageHeaderSizeTooLarge(FileSize), + + #[error("Expected header event start")] + ExpectedHeaderEvent, + + #[error("Could not load header event as it is too large: {0} bytes")] + HeaderEventSizeTooLarge(FileSize), + + #[error("Could not load event format as it is too large: {0} bytes")] + EventDescTooLarge(u64), + + #[error("Could not load kallsyms as it is too large: {0} bytes")] + KallsymsTooLarge(u64), + + #[error("Could not load trace printk format strings table as it is too large: {0} bytes")] + StrTableTooLarge(u64), + + #[error("Unexpected event header value \"{value}\" for field \"{field}\"")] + InvalidEventHeader { field: StdString, value: StdString }, + + #[error("Could not find the kernel long size")] + LongSizeNotFound, + + #[error("Could not find the kernel char signedness")] + CharSignednessNotFound, + + #[error("Data format not supported")] + UnsupportedDataFmt, + + #[error("Error while loading data: {0}")] + IoError(Box), + + #[error("Could not parse header: {0}")] + ParseError(Box), + + #[error("Error while parsing printk format: {0}")] + PrintFmtError(Box), +} +convert_err_impl!(io::ErrorKind, IoError, HeaderError); +convert_err_impl!(PrintFmtError, PrintFmtError, HeaderError); + +impl From for HeaderError { + fn from(err: IoError) -> HeaderError { + err.kind().into() + } +} + +impl, I2: AsRef<[u8]>> FromParseError> + for HeaderError +{ + fn from_parse_error(input: I, err: &nom::error::VerboseError) -> Self { + HeaderError::ParseError(Box::new(VerboseParseError::new(input, err))) + } +} + +impl> FromParseError for HeaderError { + fn from_parse_error(input: I, _err: &()) -> Self { + HeaderError::ParseError(Box::new(VerboseParseError::from_input(input))) + } +} + +#[derive(Debug, Clone)] +pub enum Options { + // v6 only, defines a non-top-level instance + Instance { + name: String, + offset: FileOffset, + }, + + // v7 only, fully defines the location of a single ring buffer + Buffer { + cpu: CPU, + name: String, + offset: FileOffset, + size: FileSize, + decomp: Option, + page_size: MemSize, + }, + + // TODO: parse + BufferText(), + + HeaderInfoLoc(FileOffset), + FtraceEventsLoc(FileOffset), + EventFormatsLoc(FileOffset), + KallsymsLoc(FileOffset), + PrintkLoc(FileOffset), + CmdLinesLoc(FileOffset), + TimeOffset(TimeOffset), + + TSC2NSec { + multiplier: u32, + shift: u32, + offset: u64, + }, + + // TODO: parse + Date(String), + + // TODO: parse + CpuStat { + cpu: CPU, + stat: String, + }, + + // TODO: parse + TraceClock(String), + // TODO: parse + Uname(String), + // TODO: parse + Hook(String), + + // TODO: parse + CpuCount(CPU), + + // TODO: parse + Version(String), + + // TODO: parse + ProcMaps(String), + + TraceId(u64), + + // TODO: parse + TimeShift(), + + // TODO: parse + Guest(), + + Unknown { + typ: u16, + data: Vec, + }, +} + +fn shared_decode_option(_abi: &Abi, option_type: u16, data: &[u8]) -> Result { + Ok(Options::Unknown { + typ: option_type, + data: data.to_vec(), + }) +} + +fn v6_parse_options(abi: &Abi, input: &mut I) -> Result, HeaderError> +where + I: BorrowingRead, +{ + let endianness = abi.endianness; + + fn parse_date(date: &str) -> Result { + let (date, sign) = if date.starts_with('-') { + (date.trim_start_matches('-'), -1) + } else { + (date, 1) + }; + + let mut offset: TimeOffset = 0; + for (prefix, base) in [ + (Some("0x"), 16), + (Some("0X"), 16), + (Some("0"), 8), + (None, 10), + ] { + let (match_, date) = match prefix { + Some(prefix) => (date.starts_with(prefix), date.trim_start_matches(prefix)), + None => (true, date), + }; + if match_ { + offset = TimeOffset::from_str_radix(date, base) + .map_err(|_| HeaderError::InvalidOption)?; + break; + } + } + + let offset = offset * sign; + Ok(offset) + } + + let mut options = Vec::new(); + loop { + let option_type: u16 = input.read_int(endianness)?; + if option_type == 0 { + break; + } + let option_size: u32 = input.read_int(endianness)?; + let option_data = input.read(option_size.try_into().unwrap())?; + options.push(match option_type { + // BUFFER: id 3, size vary + 3 => { + let (option_data, offset) = abi.parse_u64(option_data)?; + let name = CStr::from_bytes_with_nul(option_data) + .map_err(|_| HeaderError::InvalidOption)? + .to_str() + .map_err(|_| HeaderError::InvalidOption)?; + Options::Instance { + name: name.into(), + offset, + } + } + // OFFSET: id 7, size vary + 7 => { + let date = CStr::from_bytes_with_nul(option_data) + .map_err(|_| HeaderError::InvalidOption)? + .to_str() + .map_err(|_| HeaderError::InvalidOption)?; + + Options::TimeOffset(parse_date(date)?) + } + // TRACEID: id 11, size 8 + 11 => { + let (_, id) = abi.parse_u64(option_data)?; + Options::TraceId(id) + } + // TSC2NSEC: id 14, size 16 + 14 => { + let (option_data, multiplier) = abi.parse_u32(option_data)?; + let (option_data, shift) = abi.parse_u32(option_data)?; + let (_, offset) = abi.parse_u64(option_data)?; + + Options::TSC2NSec { + multiplier, + shift, + offset, + } + } + _ => shared_decode_option(abi, option_type, option_data)?, + }); + } + Ok(options) +} + +fn v7_parse_options( + abi: &Abi, + decomp: &mut Option, + mut input: I, +) -> Result<(I, Vec), HeaderError> +where + I: BorrowingRead, +{ + let mut options = Vec::new(); + let mut section_decomp = decomp.clone(); + + loop { + let section = { + let (id, options) = v7_section(abi, &mut section_decomp, &mut input)?; + if id != 0 { + Err(HeaderError::UnexpectedSection { + expected: 0, + found: id, + }) + } else { + Ok(options) + } + }?; + let mut section_data = section.deref(); + + macro_rules! read { + ($meth:ident, $update:ident) => {{ + let (update, x) = abi.$meth($update)?; + #[allow(unused_assignments)] + { + $update = update; + } + x + }}; + } + + macro_rules! read_null_terminated { + ($update:ident) => {{ + let idx = $update + .into_iter() + .position(|x| *x == 0) + .ok_or(HeaderError::InvalidOption)?; + let name = CStr::from_bytes_with_nul(&$update[..=idx]) + .map_err(|_| HeaderError::InvalidOption)? + .to_str() + .map_err(|_| HeaderError::InvalidOption)?; + $update = &$update[idx + 1..]; + name + }}; + } + + loop { + let option_type = read!(parse_u16, section_data); + let option_size = read!(parse_u32, section_data); + + match option_type { + 0 => { + let next_offset = read!(parse_u64, section_data); + + if next_offset == 0 { + drop(section); + return Ok((input, options)); + } else { + drop(section); + input = input.abs_seek(next_offset, None)?; + break; + } + } + option_type => { + let (mut option_data, section_data_) = + section_data.split_at(option_size.try_into().unwrap()); + section_data = section_data_; + + match option_type { + 3 => { + let _offset = read!(parse_u64, option_data); + let name = read_null_terminated!(option_data); + let _clock = read_null_terminated!(option_data); + let page_size = read!(parse_u32, option_data); + let nr_cpus = read!(parse_u32, option_data); + + for _i in 0..nr_cpus { + let cpu = read!(parse_u32, option_data); + let cpu_offset = read!(parse_u64, option_data); + let cpu_size = read!(parse_u64, option_data); + + options.push(Options::Buffer { + cpu, + name: name.into(), + offset: cpu_offset, + size: cpu_size, + decomp: decomp.clone(), + page_size: page_size.try_into().unwrap(), + }); + } + } + + 16 => { + options.push(Options::HeaderInfoLoc(read!(parse_u64, option_data))); + } + 17 => { + options.push(Options::FtraceEventsLoc(read!(parse_u64, option_data))); + } + 18 => { + options.push(Options::EventFormatsLoc(read!(parse_u64, option_data))); + } + 19 => { + options.push(Options::KallsymsLoc(read!(parse_u64, option_data))); + } + 20 => { + options.push(Options::PrintkLoc(read!(parse_u64, option_data))); + } + 21 => { + options.push(Options::CmdLinesLoc(read!(parse_u64, option_data))); + } + _ => options.push(shared_decode_option(abi, option_type, option_data)?), + }; + } + } + } + } +} + +pub fn header(input: &mut I) -> Result +where + I: BorrowingRead, +{ + input.read_tag(b"\x17\x08\x44tracing", HeaderError::BadMagic)??; + + let version: u64 = { + let version = input.read_null_terminated()?; + let version = from_utf8(version).map_err(|err| HeaderError::DecodeUtf8(err.to_string()))?; + version + .parse() + .map_err(|_| HeaderError::InvalidVersion(version.into())) + }?; + + let mut header = match version { + 6 => { + let (abi, page_size) = header_prefix(input)?; + v6_header(abi, input, page_size) + } + 7 => { + let (abi, page_size) = header_prefix(input)?; + v7_header(abi, input, page_size) + } + version => Err(HeaderError::InvalidVersion(version.to_string())), + }?; + header.fixup_event_descs(); + Ok(header) +} + +fn header_prefix(input: &mut I) -> Result<(Abi, FileSize), HeaderError> +where + I: BorrowingRead, +{ + let endianness: u8 = input.read_int(Endianness::Little)?; + let endianness = match endianness { + 0 => Ok(Endianness::Little), + 1 => Ok(Endianness::Big), + x => Err(HeaderError::InvalidEndianness(x)), + }?; + + let abi = Abi { + // This should not be used until it's fixed to the correct value. We + // don't use an Option as this would affect every downstream + // consumer for the benefit of just one function. + long_size: LongSize::Bits64, + char_signedness: Signedness::Unsigned, + endianness, + }; + + let _long_size: u8 = input.read_int(endianness)?; + let page_size: u32 = input.read_int(endianness)?; + let page_size: FileSize = page_size.into(); + + Ok((abi, page_size)) +} + +macro_rules! make_read_int { + ($input:expr, $abi:expr) => { + macro_rules! read_int { + () => { + $input.read_int($abi.endianness) + }; + } + }; +} + +fn v7_header(abi: Abi, input: &mut I, page_size: FileSize) -> Result +where + I: BorrowingRead, +{ + make_read_int!(input, abi); + + let comp_codec = { + let s = input.read_null_terminated()?; + from_utf8(s) + .map_err(|err| HeaderError::DecodeUtf8(err.to_string()))? + .to_string() + }; + + let _comp_codec_version = { + let s = input.read_null_terminated()?; + from_utf8(s) + .map_err(|err| HeaderError::DecodeUtf8(err.to_string()))? + .to_string() + }; + + // TODO: does the version of the codec matter at all ? + let mut decomp = match &*comp_codec { + "none" => Ok(None), + "zstd" => Ok(Some(DynDecompressor::new(ZstdDecompressor::new()))), + "zlib" => Ok(Some(DynDecompressor::new(ZlibDecompressor::new()))), + _ => Err(HeaderError::UnsupportedCompressionCodec(comp_codec)), + }?; + + let options_offset: u64 = read_int!()?; + + let input = input.clone_and_seek(options_offset, None)?; + + let (mut input, options) = v7_parse_options(&abi, &mut decomp, input)?; + + let mut event_descs = Vec::new(); + let mut kallsyms = BTreeMap::new(); + let mut str_table = BTreeMap::new(); + let mut pid_comms = BTreeMap::new(); + let nr_cpus = 0; + let mut abi = abi; + + macro_rules! get_section { + ($expected_id:expr, $offset:expr) => {{ + let expected = $expected_id; + input = input.abs_seek($offset, None)?; + let (found, section) = v7_section(&abi, &mut decomp, &mut input)?; + + if expected != found { + Err(HeaderError::UnexpectedSection { expected, found }) + } else { + Ok(()) + }?; + + // Use Rc> so that we can cheaply clone the + // ScratchBox. We need NestedPointer to have Rc> + // implement AsRef. We need to pass the ScratchBox directly + // instead of borrowing otherwise ownership of the actual box is + // tedious (mainly because of the loop implying the need to + // re-assign to the owning variable). + let section = BorrowingCursor::new(NestedPointer::new(Rc::new(section))); + Ok::<_, HeaderError>(section) + }}; + } + + // Ensure we have accurate Abi information before doing anything else + for option in &options { + if let Options::HeaderInfoLoc(offset) = option { + let mut section = get_section!(16, *offset)?; + abi = parse_header_info_section(abi, &mut section)?; + } + } + + for option in &options { + match option { + Options::FtraceEventsLoc(offset) => { + let mut section = get_section!(17, *offset)?; + event_descs.extend(parse_subsystem_event_formats(&abi, &mut section)?); + } + Options::EventFormatsLoc(offset) => { + let mut section = get_section!(18, *offset)?; + event_descs.extend(parse_event_formats_section(&abi, &mut section)?); + } + Options::KallsymsLoc(offset) => { + let mut section = get_section!(19, *offset)?; + kallsyms.extend(parse_kallsyms_section(&abi, &mut section)?); + } + Options::PrintkLoc(offset) => { + let mut section = get_section!(20, *offset)?; + str_table.extend(parse_printk_section(&abi, &mut section)?); + } + Options::CmdLinesLoc(offset) => { + let mut section = get_section!(21, *offset)?; + pid_comms.extend(parse_cmdlines_section(&abi, &mut section)?); + } + _ => (), + } + } + + Ok(Header { + inner: VersionedHeader::V7(HeaderV7 { + kernel_abi: abi, + page_size, + event_descs, + kallsyms, + str_table, + pid_comms, + options, + nr_cpus, + }), + }) +} + +fn v7_section<'a, I, C>( + abi: &Abi, + decomp: &'a mut Option, + input: &'a mut I, +) -> Result<(SectionId, Cow<'a, [u8]>), HeaderError> +where + I: BorrowingRead, + C: Decompressor, +{ + make_read_int!(input, abi); + + let id: u16 = read_int!()?; + let flags: u16 = read_int!()?; + let compressed = flags & 0x1 != 0; + + // Description of the section, stored in a string table + let _string_id: u32 = read_int!()?; + let size: u64 = read_int!()?; + let size: usize = size + .try_into() + .map_err(|_| HeaderError::SectionTooLarge(size))?; + + let data = if compressed { + let compressed_size: u32 = read_int!()?; + assert_eq!(compressed_size as usize, size - 8); + + let decompressed_size: u32 = read_int!()?; + let decompressed_size: usize = decompressed_size + .try_into() + .map_err(|_| HeaderError::SectionTooLarge(decompressed_size.into()))?; + let data = input.read(compressed_size.try_into().unwrap())?; + match decomp { + Some(decomp) => decomp.decompress(data, decompressed_size).map(Cow::Owned)?, + None => Err(HeaderError::CompressedSectionWithoutCodec)?, + } + } else { + input.read(size).map(Cow::Borrowed)? + }; + + Ok((id, data)) +} + +fn parse_event_formats_section(abi: &Abi, input: &mut I) -> Result, HeaderError> +where + I: BorrowingRead, +{ + make_read_int!(input, abi); + + let mut event_descs: Vec = Vec::new(); + let nr_event_systems: u32 = read_int!()?; + + for _ in 0..nr_event_systems { + let _system_name = from_utf8(input.read_null_terminated()?) + .map_err(|err| HeaderError::DecodeUtf8(err.to_string()))?; + + event_descs.extend(parse_subsystem_event_formats(abi, input)?) + } + Ok(event_descs) +} + +fn parse_subsystem_event_formats(abi: &Abi, input: &mut I) -> Result, HeaderError> +where + I: BorrowingRead, +{ + make_read_int!(input, abi); + + let mut event_descs: Vec = Vec::new(); + let nr_event_descs: u32 = read_int!()?; + + for _ in 0..nr_event_descs { + let desc_size: u64 = read_int!()?; + let desc_size: usize = desc_size + .try_into() + .map_err(|_| HeaderError::EventDescTooLarge(desc_size))?; + let desc = input.parse(desc_size, parse_event_desc)??; + event_descs.push(desc); + } + Ok(event_descs) +} + +fn parse_kallsyms_section( + abi: &Abi, + input: &mut I, +) -> Result, HeaderError> +where + I: BorrowingRead, +{ + make_read_int!(input, abi); + + let kallsyms_size: u32 = read_int!()?; + let kallsyms_size: usize = kallsyms_size + .try_into() + .map_err(|_| HeaderError::KallsymsTooLarge(kallsyms_size.into()))?; + input.parse(kallsyms_size, parse_kallsyms)? +} + +fn parse_printk_section<'a, 'abi: 'a, I>( + abi: &'abi Abi, + input: &'a mut I, +) -> Result, HeaderError> +where + I: BorrowingRead, +{ + make_read_int!(input, abi); + + let str_table_size: u32 = read_int!()?; + let str_table_size: usize = str_table_size + .try_into() + .map_err(|_| HeaderError::StrTableTooLarge(str_table_size.into()))?; + input.parse(str_table_size, parse_str_table)? +} + +fn parse_cmdlines_section( + abi: &Abi, + input: &mut I, +) -> Result, HeaderError> +where + I: BorrowingRead, +{ + make_read_int!(input, abi); + + let pid_comms_size: u64 = read_int!()?; + let pid_comms_size: usize = pid_comms_size.try_into().unwrap(); + input.parse(pid_comms_size, parse_pid_comms)? +} + +fn parse_header_info_section(abi: Abi, input: &mut I) -> Result +where + I: BorrowingRead, +{ + make_read_int!(input, abi); + + // Header page + input.read_tag(b"header_page\0", HeaderError::ExpectedHeaderPage)??; + let page_header_size_u64: u64 = read_int!()?; + let page_header_size: usize = page_header_size_u64 + .try_into() + .map_err(|_| HeaderError::PageHeaderSizeTooLarge(page_header_size_u64))?; + + let header_fields = input.parse( + page_header_size, + // Disable type check due to: + // https://bugzilla.kernel.org/show_bug.cgi?id=216999 + |input| parse_struct_fmt(&abi, true, input), + )??; + + // Fixup ABI with long_size + let long_size = match header_fields.field_by_name("commit") { + Some(commit) => commit.size.try_into().map_err(HeaderError::InvalidLongSize), + None => Err(HeaderError::LongSizeNotFound), + }?; + + let char_signedness = match header_fields.field_by_name("data") { + Some(data) => match &data.declaration.typ { + Type::U8 => Ok(Signedness::Unsigned), + Type::I8 => Ok(Signedness::Unsigned), + _ => Err(HeaderError::CharSignednessNotFound), + }, + None => Err(HeaderError::CharSignednessNotFound), + }?; + + let abi = Abi { + long_size, + char_signedness, + ..abi + }; + + // Header event + input.read_tag(b"header_event\0", HeaderError::ExpectedHeaderEvent)??; + let header_event_size: u64 = read_int!()?; + let header_event_size: usize = header_event_size + .try_into() + .map_err(|_| HeaderError::HeaderEventSizeTooLarge(header_event_size))?; + input.parse(header_event_size, parse_header_event)??; + + Ok(abi) +} + +fn v6_header(abi: Abi, input: &mut I, page_size: FileSize) -> Result +where + I: BorrowingRead, +{ + let endianness = abi.endianness; + let abi = parse_header_info_section(abi, input)?; + + let mut event_descs = parse_subsystem_event_formats(&abi, input)?; + + event_descs.extend(parse_event_formats_section(&abi, input)?); + + let kallsyms = parse_kallsyms_section(&abi, input)?; + let str_table = parse_printk_section(&abi, input)?; + let pid_comms = parse_cmdlines_section(&abi, input)?; + + let nr_cpus: u32 = input.read_int(endianness)?; + + let mut options = Vec::new(); + loop { + let data_kind = input.read_null_terminated()?; + match data_kind { + b"options " => { + options.extend(v6_parse_options(&abi, input)?); + } + kind => { + let kind = kind.to_owned(); + let top_level_buffer_locations = buffer_locations(&kind, nr_cpus, &abi, "", input)?; + break Ok(Header { + inner: VersionedHeader::V6(HeaderV6 { + kernel_abi: abi, + page_size, + event_descs, + kallsyms, + str_table, + pid_comms, + top_level_buffer_locations, + options, + nr_cpus, + }), + }); + } + } + } +} + +pub(crate) fn buffer_locations( + kind: &[u8], + nr_cpus: CPU, + abi: &Abi, + name: &str, + input: &mut I, +) -> Result, HeaderError> +where + I: BorrowingRead, +{ + let endianness = abi.endianness; + match kind { + b"flyrecord" => (0..nr_cpus) + .map(|cpu| { + let offset: u64 = input.read_int(endianness)?; + let size: u64 = input.read_int(endianness)?; + Ok(BufferLocation { + id: BufferId { + cpu, + name: name.into(), + }, + offset, + size, + }) + }) + .collect::, _>>(), + b"latency " => Err(HeaderError::UnsupportedDataFmt), + _ => Err(HeaderError::UnsupportedDataFmt), + } +} + +#[cfg(test)] +mod tests { + + use super::*; + use crate::{ + parser::tests::test_parser, + print::{ + PrintAtom, PrintFlags, PrintFmtStr, PrintPrecision, PrintSpecifier, PrintWidth, + VBinSpecifier, + }, + }; + + #[derive(Debug, PartialEq)] + struct EventDescContent { + name: String, + id: EventId, + fmt: EventFmt, + } + + #[test] + fn event_desc_parser_test() { + let abi = Abi { + long_size: LongSize::Bits64, + endianness: Endianness::Little, + char_signedness: Signedness::Unsigned, + }; + let test = |fmt: &[u8], expected: EventDescContent| { + let header = Header { + inner: VersionedHeader::V6(HeaderV6 { + kernel_abi: abi.clone(), + page_size: 4096, + event_descs: Vec::new(), + kallsyms: BTreeMap::new(), + str_table: BTreeMap::new(), + pid_comms: BTreeMap::new(), + options: Vec::new(), + top_level_buffer_locations: Vec::new(), + nr_cpus: 0, + }), + }; + let header = Arc::new(header); + + let parser = parse_event_desc.map(|mut desc| { + desc.header = Some(Arc::clone(&header)); + + EventDescContent { + name: desc.name.clone(), + id: desc.id, + fmt: desc + .event_fmt() + .cloned() + .expect("Error while computing EventFmt"), + } + }); + test_parser(expected, fmt, parser) + }; + + macro_rules! new_variable_atom { + ($($args:expr),* $(,)?) => { + PrintAtom::new_variable( + &abi, + $($args),* + ) + } + } + + test( + b"name: wakeup\nID: 3\nformat:\n\tfield:unsigned short common_type;\toffset:0;\tsize:2;\tsigned:0;\n\tfield:unsigned char common_flags;\toffset:2;\tsize:1;\tsigned:0;\n\tfield:unsigned char common_preempt_count;\toffset:3;\tsize:1;\tsigned:0;\n\tfield:int common_pid;\toffset:4;\tsize:4;\tsigned:1;\n\n\tfield:unsigned int prev_pid;\toffset:8;\tsize:4;\tsigned:0;\n\tfield:unsigned int next_pid;\toffset:12;\tsize:4;\tsigned:0;\n\tfield:unsigned int next_cpu;\toffset:16;\tsize:4;\tsigned:0;\n\tfield:unsigned char prev_prio;\toffset:20;\tsize:1;\tsigned:0;\n\tfield:unsigned char prev_state;\toffset:21;\tsize:1;\tsigned:0;\n\tfield:unsigned char next_prio;\toffset:22;\tsize:1;\tsigned:0;\n\tfield:unsigned char next_state;\toffset:23;\tsize:1;\tsigned:0;\n\nprint fmt: \"%u:%u:%u ==+ %u:%u:%u \\\" \\t [%03u]\", 55\n", + EventDescContent { + name: "wakeup".into(), + id: 3, + fmt: EventFmt { + print_args: Ok(vec![]), + print_fmt: Ok(PrintFmtStr { + vbin_decoders: OnceCell::new(), + atoms: vec![ + new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Dec, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + ), + PrintAtom::Fixed(":".into()), + new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Dec, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + ), + PrintAtom::Fixed(":".into()), + new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Dec, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + ), + PrintAtom::Fixed(" ==+ ".into()), + new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Dec, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + ), + PrintAtom::Fixed(":".into()), + new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Dec, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + ), + PrintAtom::Fixed(":".into()), + new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Dec, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + ), + PrintAtom::Fixed(" \" \t [".into()), + new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Dec, + PrintFlags::ZeroPad, + PrintWidth::Fixed(3), + PrintPrecision::Unmodified, + ), + PrintAtom::Fixed("]".into()), + + ], + }), + struct_fmt: Ok(StructFmt { + fields: vec![ + FieldFmt { + declaration: Declaration { + identifier: "common_type".into(), + typ: Type::U16, + }, + offset: 0, + size: 2, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "common_flags".into(), + typ: Type::U8, + }, + offset: 2, + size: 1, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "common_preempt_count".into(), + typ: Type::U8, + }, + offset: 3, + size: 1, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "common_pid".into(), + typ: Type::I32, + }, + offset: 4, + size: 4, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "prev_pid".into(), + typ: Type::U32, + }, + offset: 8, + size: 4, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "next_pid".into(), + typ: Type::U32, + }, + offset: 12, + size: 4, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "next_cpu".into(), + typ: Type::U32, + }, + offset: 16, + size: 4, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "prev_prio".into(), + typ: Type::U8, + }, + offset: 20, + size: 1, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "prev_state".into(), + typ: Type::U8, + }, + offset: 21, + size: 1, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "next_prio".into(), + typ: Type::U8, + }, + offset: 22, + size: 1, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "next_state".into(), + typ: Type::U8, + }, + offset: 23, + size: 1, + decoder: Arc::new(()), + }, + ] + }) + } + } + + ); + + test( + b"name: user_stack\nID: 12\nformat:\n\tfield:unsigned short common_type;\toffset:0;\tsize:2;\tsigned:0;\n\tfield:unsigned char common_flags;\toffset:2;\tsize:1;\tsigned:0;\n\tfield:unsigned char common_preempt_count;\toffset:3;\tsize:1;\tsigned:0;\n\tfield:int common_pid;\toffset:4;\tsize:4;\tsigned:1;\n\n\tfield:unsigned int tgid;\toffset:8;\tsize:4;\tsigned:0;\n\tfield:unsigned long caller[8];\toffset:16;\tsize:64;\tsigned:0;\n\nprint fmt: \"\\t=> %ps\", (void *)REC->caller[0], (void *)REC->caller[1]\n", + EventDescContent { + name: "user_stack".into(), + id: 12, + fmt: EventFmt { + print_args: Ok(vec![]), + print_fmt: Ok(PrintFmtStr { + vbin_decoders: OnceCell::new(), + atoms: vec![ + PrintAtom::Fixed("\t=> ".into()), + new_variable_atom!( + VBinSpecifier::U64, + PrintSpecifier::Symbol, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + ), + ] + }), + struct_fmt: Ok(StructFmt { + fields: vec![ + FieldFmt { + declaration: Declaration { + identifier: "common_type".into(), + typ: Type::U16, + }, + offset: 0, + size: 2, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "common_flags".into(), + typ: Type::U8, + }, + offset: 2, + size: 1, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "common_preempt_count".into(), + typ: Type::U8, + }, + offset: 3, + size: 1, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "common_pid".into(), + typ: Type::I32, + }, + offset: 4, + size: 4, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "tgid".into(), + typ: Type::U32, + }, + offset: 8, + size: 4, + decoder: Arc::new(()), + }, + FieldFmt { + declaration: Declaration { + identifier: "caller".into(), + typ: Type::Array(Box::new(Type::U64), ArrayKind::Fixed(Ok(8))), + }, + offset: 16, + size: 64, + decoder: Arc::new(()), + }, + ] + }) + } + } + ); + } +} diff --git a/tools/trace-parser/traceevent/src/io.rs b/tools/trace-parser/traceevent/src/io.rs new file mode 100644 index 0000000000..b5a00a33cd --- /dev/null +++ b/tools/trace-parser/traceevent/src/io.rs @@ -0,0 +1,953 @@ +use core::{ + fmt::Debug, + mem::size_of, + ops::{Deref, DerefMut, Range}, +}; +use std::{ + io, + io::{BufRead, BufReader, ErrorKind, Read, Seek, SeekFrom}, + os::fd::AsRawFd, + sync::{Arc, Mutex}, +}; + +use nom::IResult; + +use crate::{ + header::{Endianness, FileOffset, FileSize, MemOffset, MemSize}, + parser::{FromParseError, NomError, NomParserExt as _}, + scratch::{OwnedScratchBox, ScratchAlloc, ScratchVec}, +}; + +// We have to know which of MemOffset and FileOffset is the biggest. This module assumes +// MemOffset <= FileOffset, so it converts MemOffset to FileOffset when a common denominator is +// required. +#[inline] +fn mem2file(x: MemOffset) -> FileOffset { + x.try_into() + .expect("Could not convert MemOffset to FileOffset") +} +#[inline] +fn file2mem(x: FileOffset) -> MemOffset { + x.try_into() + .expect("Could not convert FileOffset to MemOffset") +} + +pub trait BorrowingReadCore { + fn clear_buffer(&mut self); + fn read(&mut self, count: MemSize) -> io::Result<&[u8]>; + fn read_null_terminated(&mut self) -> io::Result<&[u8]>; + + fn try_clone(&self) -> io::Result + where + Self: Sized; + fn abs_seek(self, offset: FileOffset, len: Option) -> io::Result + where + Self: Sized; + + #[inline] + fn clone_and_seek(&self, offset: FileOffset, len: Option) -> io::Result + where + Self: Sized, + { + self.try_clone()?.abs_seek(offset, len) + } +} +pub trait BorrowingRead: BorrowingReadCore { + #[inline] + fn parse(&mut self, count: MemSize, mut parser: P) -> io::Result> + where + // Sadly we can't take an for<'b>impl Parser<&'b [u8], _, _> as it + // seems impossible to build real-world parsers complying with for<'b>. + // In practice, the error type needs bounds involving 'b, and there is + // no way to shove these bounds inside the scope of for<'b> (for now at + // least). As a result, 'b needs to be a generic param on the parser + // function and our caller gets to choose the lifetime, not us. + P: for<'b> Fn( + &'b [u8], + ) + -> IResult<&'b [u8], O, NomError>>, + E: for<'b> FromParseError<&'b [u8], nom::error::VerboseError<&'b [u8]>> + Debug, + { + let buf = self.read(count)?; + Ok(parser.parse_finish(buf)) + } + + #[inline] + fn read_int(&mut self, endianness: Endianness) -> io::Result + where + T: DecodeBinary, + { + DecodeBinary::decode(self.read(size_of::())?, endianness) + } + + #[inline] + fn read_tag<'b, T, E>(&mut self, tag: T, or: E) -> io::Result> + where + T: IntoIterator, + T::IntoIter: ExactSizeIterator, + { + let tag = tag.into_iter(); + let buff = self.read(tag.len())?; + let eq = buff.iter().eq(tag); + Ok(if eq { Ok(()) } else { Err(or) }) + } +} + +impl BorrowingRead for T {} + +// impl<'a> BorrowingReadCore for &'a mut dyn BorrowingReadCore { +// #[inline] +// fn clear_buffer(&mut self) { +// (*self).clear_buffer() +// } +// #[inline] +// fn read(&mut self, count: MemSize) -> io::Result<&[u8]> { +// (*self).read(count) +// } +// #[inline] +// fn read_null_terminated(&mut self) -> io::Result<&[u8]> { +// (*self).read_null_terminated() +// } + +// #[inline] +// fn try_clone(&self) -> io::Result { +// (*self).try_clone() +// } +// #[inline] +// fn abs_seek(self, offset: FileOffset, len: Option) -> io::Result { +// (*self).abs_seek(offset, len) +// } + +// #[inline] +// fn parse(&mut self, count: MemSize, mut parser: P) -> io::Result> +// where +// // Sadly we can't take an for<'b>impl Parser<&'b [u8], _, _> as it +// // seems impossible to build real-world parsers complying with for<'b>. +// // In practice, the error type needs bounds involving 'b, and there is +// // no way to shove these bounds inside the scope of for<'b> (for now at +// // least). As a result, 'b needs to be a generic param on the parser +// // function and our caller gets to choose the lifetime, not us. +// P: for<'b> Fn( +// &'b [u8], +// ) +// -> IResult<&'b [u8], O, NomError>>, +// E: for<'b> FromParseError<&'b [u8], nom::error::VerboseError<&'b [u8]>> + Debug, +// { +// (*self).parse(count, parser) +// } + +// #[inline] +// fn read_int(&mut self, endianness: Endianness) -> io::Result +// where +// T: DecodeBinary, +// { +// (*self).read_int(endianness) +// } + +// #[inline] +// fn read_tag<'b, T, E>(&mut self, tag: T, or: E) -> io::Result> +// where +// T: IntoIterator, +// T::IntoIter: ExactSizeIterator, +// { +// (*self).read_tag(tag, or) +// } + +// #[inline] +// fn clone_and_seek(&self, offset: FileOffset, len: Option) -> io::Result { +// (*self).clone_and_seek(offset, len) +// } +// } + +/// Newtype wrapper for &[u8] that allows zero-copy operations from +/// [`BorrowingReadCore`]. It is similar to what [`Cursor`] provides to [`Read`]. + +#[derive(Clone)] +pub struct BorrowingCursor { + inner: T, + offset: MemOffset, + len: MemSize, +} + +impl BorrowingCursor +where + T: AsRef<[u8]>, +{ + #[inline] + pub fn new(inner: T) -> Self { + BorrowingCursor { + offset: 0, + len: inner.as_ref().len(), + inner, + } + } + + #[inline] + fn buf(&self) -> &[u8] { + self.inner.as_ref() + } + + #[inline] + fn max_offset(&self) -> MemOffset { + self.offset + self.len + } + + #[inline] + fn range(&self) -> Range { + self.offset..self.max_offset() + } + + #[inline] + fn slice(&self) -> &[u8] { + &self.buf()[self.range()] + } + + #[inline] + fn advance(&mut self, count: MemOffset) -> io::Result<&[u8]> { + if self.offset + count > self.max_offset() { + Err(ErrorKind::UnexpectedEof.into()) + } else { + let range = self.offset..(self.offset + count); + + self.offset += count; + self.len -= count; + + Ok(&self.buf()[range]) + } + } +} + +impl From for BorrowingCursor +where + T: AsRef<[u8]>, +{ + #[inline] + fn from(x: T) -> Self { + BorrowingCursor::new(x) + } +} + +impl BorrowingReadCore for BorrowingCursor +where + T: AsRef<[u8]> + Clone, +{ + #[inline] + fn clear_buffer(&mut self) {} + + #[inline] + fn read(&mut self, count: MemSize) -> io::Result<&[u8]> { + self.advance(count) + } + + fn read_null_terminated(&mut self) -> io::Result<&[u8]> { + match self.slice().iter().position(|x| *x == 0) { + Some(end) => { + let range = self.offset..(self.offset + end); + self.advance(end + 1)?; + Ok(&self.buf()[range]) + } + None => { + self.advance(self.len)?; + Err(ErrorKind::UnexpectedEof.into()) + } + } + } + + #[inline] + fn try_clone(&self) -> io::Result { + Ok(self.clone()) + } + + fn abs_seek(self, offset: FileOffset, len: Option) -> io::Result { + #[inline] + fn convert(x: FileOffset) -> io::Result { + x.try_into().map_err(|_| ErrorKind::UnexpectedEof.into()) + } + + let offset = convert(offset)?; + let len = match len { + Some(len) => convert(len), + None => Ok(self.buf().len() - offset), + }?; + + if offset + len > self.buf().len() { + Err(ErrorKind::UnexpectedEof.into()) + } else { + Ok(BorrowingCursor { + inner: self.inner, + offset, + len, + }) + } + } +} + +///////////// +// Memory map +///////////// + +struct Mmap { + // Offset in the file matching the beginning of the memory area + file_offset: FileOffset, + // Current offset in the memory area. + read_offset: MemOffset, + // Length of the mmapped area. This could be smaller than the actual mmapped + // area if the mmap was recycled with an adjusted length. + len: MemSize, + mmap: memmap2::Mmap, +} + +impl Mmap { + unsafe fn new(file: &T, offset: FileOffset, mut len: MemSize) -> io::Result + where + T: AsRawFd, + { + //SAFETY: mmap is inherently unsafe as the memory content could change + // without notice if the backing file is modified. We have to rely on + // the user/OS being nice to us and not do that, or we might crash, + // there is no way around it unfortunately. + let mmap = loop { + let mmap = unsafe { + memmap2::MmapOptions::new() + .offset(offset) + .len(len) + .populate() + .map(file) + }; + match mmap { + Ok(mmap) => break Ok(mmap), + Err(err) => { + len /= 2; + if len == 0 { + break Err(err); + } + } + } + }?; + + let _ = mmap.advise(memmap2::Advice::WillNeed); + let _ = mmap.advise(memmap2::Advice::Sequential); + + Ok(Mmap { + len, + file_offset: offset, + read_offset: 0, + mmap, + }) + } + + #[inline] + fn curr_offset(&self) -> FileOffset { + self.file_offset + mem2file(self.read_offset) + } + + #[inline] + fn max_file_offset(&self) -> FileOffset { + self.file_offset + mem2file(self.len) + } + + #[inline] + fn remaining(&self) -> MemSize { + self.len - self.read_offset + } + + #[inline] + fn read(&mut self, count: MemSize) -> Option<&[u8]> { + if self.remaining() >= count { + let view = &self.mmap[self.read_offset..self.read_offset + count]; + self.read_offset += count; + Some(view) + } else { + None + } + } + + fn abs_seek(mut self, offset: FileOffset, len: FileSize) -> Option { + if offset >= self.file_offset && offset + len <= self.max_file_offset() { + let delta = file2mem(offset - self.file_offset); + self.read_offset = delta; + self.len = delta + len.try_into().unwrap_or(MemSize::MAX); + assert!(self.curr_offset() < self.max_file_offset()); + Some(self) + } else { + None + } + } +} + +impl Deref for Mmap { + type Target = [u8]; + #[inline] + fn deref(&self) -> &Self::Target { + &self.mmap[self.read_offset..self.len] + } +} + +struct MmapFileInner { + // Use a Arc> so that we can clone the reference when creating + // MmapFile + file: Arc>, + // Total file size. This is used to validate new length in order to avoid + // creating mappings that would lead to a SIGBUS. + file_len: FileSize, + + // Logical length of the area we want to mmap. The actual mmapped area at + // any given time might be smaller. + len: FileSize, + // Original offset of the mmapped area. The current read offset is + // maintained in Mmap. + offset: FileOffset, + + mmap: Mmap, +} + +impl MmapFileInner { + #[inline] + fn curr_offset(&self) -> FileOffset { + self.mmap.curr_offset() + } + + #[inline] + fn max_offset(&self) -> FileOffset { + self.offset + self.len + } + + #[inline] + unsafe fn remap(&mut self, offset: FileOffset) -> io::Result + where + T: AsRawFd, + { + // Try to map all the remainder of the file range of interest. + let len = if offset > self.offset { + self.len - (offset - self.offset) + } else { + self.len + (self.offset - offset) + }; + + // Saturate at the max size possible for a mmap + let len: MemSize = len.try_into().unwrap_or(MemSize::MAX); + Mmap::new(self.file.lock().unwrap().deref(), offset, len) + } +} + +pub struct MmapFile { + inner: MmapFileInner, + scratch: ScratchAlloc, +} + +impl MmapFile { + /// # Safety + /// + /// Undefined behavior will happen if the file is modified while it is opened from here, + /// as Rust will not expect the underlying memory to change randomly. + pub unsafe fn new(mut file: T) -> io::Result> + where + T: AsRawFd + Seek, + { + let offset = 0; + let len = file_len(&mut file)?; + let file = Arc::new(Mutex::new(file)); + Self::from_cell(file, offset, None, len) + } + + unsafe fn from_cell( + file: Arc>, + offset: FileOffset, + len: Option, + file_len: FileSize, + ) -> io::Result> + where + T: AsRawFd, + { + let len = len.unwrap_or(file_len - offset); + + // Check that we are not trying to mmap past the end of the + // file, as mmap() will let us do it but we will get SIGBUS upon + // access. + if offset + len > file_len { + Err(ErrorKind::UnexpectedEof.into()) + } else { + let mmap_len = len.try_into().unwrap_or(MemSize::MAX); + let mmap = Mmap::new(file.lock().unwrap().deref(), offset, mmap_len)?; + + Ok(MmapFile { + inner: MmapFileInner { + file, + len, + file_len, + mmap, + offset, + }, + scratch: ScratchAlloc::new(), + }) + } + } +} + +impl BorrowingReadCore for MmapFile +where + T: AsRawFd + Read + Seek, +{ + #[inline] + fn clear_buffer(&mut self) { + self.scratch.reset() + } + + #[inline] + fn read(&mut self, count: MemSize) -> io::Result<&[u8]> { + self.clear_buffer(); + + let max_offset = self.inner.max_offset(); + if self.inner.curr_offset() + mem2file(count) > max_offset { + // Remap to consume all the remaining data to be consistent with + // other BorrowingReadCore implementations. + self.inner.mmap = unsafe { self.inner.remap(max_offset)? }; + Err(ErrorKind::UnexpectedEof.into()) + } else { + // Workaround limitation of NLL borrow checker, see: + // https://docs.rs/polonius-the-crab/latest/polonius_the_crab/ + macro_rules! this { + () => { + // SAFETY: This is safe as long as we do not use "self" + // anymore from the first use of this!(). It has been + // checked with MIRI. + // If and when Polonius borrow checker becomes available, + // that unsafe{} block can simply be replaced by "self". It + // has been tested with RUSTFLAGS="-Z polonius". + { + #[allow(unused_unsafe)] + unsafe { + &mut *(self as *mut Self) + } + } + }; + } + + macro_rules! inner { + () => { + this!().inner + }; + } + + for i in 0..2 { + if let Some(slice) = inner!().mmap.read(count) { + return Ok(slice); + } else if i == 0 { + // Not enough bytes left in the mmap, so we need to remap to catch + // up with the read offset. + inner!().mmap = unsafe { inner!().remap(inner!().curr_offset())? }; + } + } + // Remapping was not enough, we need to fallback on read() + // syscall. We discard the mapping we just created, as it's + // useless since it cannot service even the first read. + + let file_offset = inner!().curr_offset(); + let count_u64 = mem2file(count); + + // Remap for future reads after the syscall we are abount to do. + inner!().mmap = unsafe { inner!().remap(file_offset + count_u64)? }; + assert!(inner!().curr_offset() == file_offset + count_u64); + + let mut file = inner!().file.lock().unwrap(); + file.seek(SeekFrom::Start(file_offset))?; + + let scratch = &this!().scratch; + read(file.deref_mut(), count, scratch) + } + } + + #[inline] + fn read_null_terminated(&mut self) -> io::Result<&[u8]> { + self.clear_buffer(); + + let find = |buf: &[u8]| buf.iter().position(|x| *x == 0); + + for i in 0..2 { + match find(&self.inner.mmap) { + Some(end) => { + let view = self.inner.mmap.read(end + 1).unwrap(); + // Remove the null terminator from the view + let view = &view[..view.len() - 1]; + return Ok(view); + } + // Update the mapping to catch up with the current read offset and try again. + None => { + // We scanned the entire content of the current mmap and + // there won't be anything else to mmap after that, so we + // reached the end. + if self.inner.curr_offset() + mem2file(self.inner.mmap.remaining()) + >= self.inner.max_offset() + { + // Consume all the remaining data to be consistent with + // other implementations. + assert!(self.inner.mmap.read(self.inner.mmap.remaining()).is_some()); + return Err(ErrorKind::UnexpectedEof.into()); + } else if i == 0 { + self.inner.mmap = unsafe { self.inner.remap(self.inner.curr_offset())? }; + } + } + } + } + + // We failed to find the pattern in the area covered by mmap, so try + // again with read() syscall. + + let out = { + let mut file = self.inner.file.lock().unwrap(); + let file_offset = self.inner.curr_offset(); + file.seek(SeekFrom::Start(file_offset))?; + + let buf_size = 4096; + let mut out = ScratchVec::new_in(&self.scratch); + + loop { + let prev_len = out.len(); + out.resize(prev_len + buf_size, 0); + + let nr = file.read(&mut out[prev_len..])?; + out.truncate(prev_len + nr); + + if let Some(end) = find(&mut out[prev_len..]) { + out.truncate(prev_len + end); + break out; + } + } + }; + + // Remap for future reads after the syscall we are about to do. + let mmap_offset = self.inner.curr_offset() + mem2file(out.len()) + 1; + self.inner.mmap = unsafe { self.inner.remap(mmap_offset)? }; + + Ok(out.leak()) + } + + #[inline] + fn clone_and_seek(&self, offset: FileOffset, len: Option) -> io::Result { + unsafe { + Self::from_cell( + Arc::clone(&self.inner.file), + offset, + len, + self.inner.file_len, + ) + } + } + + #[inline] + fn try_clone(&self) -> io::Result { + unsafe { + Self::from_cell( + Arc::clone(&self.inner.file), + self.inner.curr_offset(), + None, + self.inner.file_len, + ) + } + } + + fn abs_seek(mut self, offset: FileOffset, len: Option) -> io::Result { + let file_len = self.inner.file_len; + let len = len.unwrap_or(file_len - offset); + + // Try to recycle the existing mapping if the new offset/size fits + // inside it. + match self.inner.mmap.abs_seek(offset, len) { + Some(mmap) => { + self.inner.mmap = mmap; + self.inner.offset = offset; + self.inner.len = len; + Ok(self) + } + None => unsafe { Self::from_cell(self.inner.file, offset, Some(len), file_len) }, + } + } +} + +// libtraceevent deals with it by keeping a loaded page for each CPU buffer. The page either comes from: +// * a mmap (not necessarily the whole file, it can deal with a small bit) +// * a simple seek + read + seek sequence to load a page in memory. +// +// This means it cannot consume a non-seek fd (tested with cat trace.dat | trace-cmd report /dev/stdin). +// +// This allows efficient access that avoids seeking all the time (only a few +// seeks to load a whole page), and there is one read buffer for each CPU buffer +// (as opposed to a single BufReader that would never preload the right piece of +// info since it would be shared for multiple offsets). +// + +pub struct BorrowingBufReader { + inner: BufReader>, + consume: MemSize, + len: FileSize, + max_len: FileSize, + scratch: ScratchAlloc, +} + +impl BorrowingBufReader +where + T: Read + Seek, +{ + pub fn new(mut reader: T, buf_size: Option) -> io::Result { + let len = file_len(&mut reader)?; + let offset = 0; + let reader = Arc::new(Mutex::new(reader)); + + Ok(Self::new_with(reader, buf_size, offset, len, len)) + } + + fn new_with( + reader: Arc>, + buf_size: Option, + offset: FileOffset, + len: FileSize, + max_len: FileSize, + ) -> Self { + let buf_size = buf_size.unwrap_or(4096); + + let reader = CursorReader::new(reader, offset, len); + let reader = BufReader::with_capacity(buf_size, reader); + + BorrowingBufReader { + inner: reader, + consume: 0, + len, + max_len, + scratch: ScratchAlloc::new(), + } + } + + #[inline] + fn consume(&mut self) { + self.inner.consume(self.consume); + self.consume = 0; + self.clear_buffer(); + } +} + +impl BorrowingReadCore for BorrowingBufReader +where + T: Read + Seek, +{ + #[inline] + fn clear_buffer(&mut self) { + self.scratch.reset(); + } + + #[inline] + fn read(&mut self, count: MemSize) -> io::Result<&[u8]> { + self.consume(); + + let buf = self.inner.fill_buf()?; + let len = buf.len(); + + if len == 0 && count > 0 { + Err(ErrorKind::UnexpectedEof.into()) + } else if count < len { + self.consume = count; + Ok(&self.inner.buffer()[..count]) + } else { + // Pre-filled buffer not large enough for that read, fallback on + // read() syscall + read(&mut self.inner, count, &self.scratch) + } + } + + #[inline] + fn read_null_terminated(&mut self) -> io::Result<&[u8]> { + self.consume(); + + let buf = self.inner.fill_buf()?; + let end = buf.iter().position(|x| *x == 0); + + match end { + Some(end) => { + if end == 0 { + let data = OwnedScratchBox::with_capacity_in(0, &self.scratch); + Ok(data.leak()) + } else { + self.consume = end + 1; + // For some reason, the borrow checker is not happy for us + // to use buf directly, so we fetch it again with + // self.inner.buffer() + Ok(&self.inner.buffer()[..end]) + } + } + None => { + // If we could not find the data in the pre-loaded buffer, just read + // as much as needed + let mut vec = ScratchVec::new_in(&self.scratch); + + loop { + let mut buf = [0]; + self.inner.read_exact(&mut buf)?; + let x = buf[0]; + if x == 0 { + break; + } else { + vec.push(x) + } + } + Ok(vec.leak()) + } + } + } + + fn try_clone(&self) -> io::Result { + let mut reader = self.inner.get_ref().clone(); + // We need to make sure the CursorReader inside the BufReader is + // pointing at the current offset, not the offset that we were advanced + // at to fill the buffer. + // We could use: + // self.inner.seek(SeekFrom::Current(0)); + // But this would purge the buffer of our BufReader, so instead we can + // just fixup the cloned CursorReader current offset. + reader.offset -= mem2file(self.inner.buffer().len() - self.consume); + + let inner = BufReader::with_capacity(self.inner.capacity(), reader); + Ok(BorrowingBufReader { + inner, + consume: 0, + len: self.len, + max_len: self.max_len, + scratch: ScratchAlloc::new(), + }) + } + + fn abs_seek(mut self, offset: FileOffset, len: Option) -> io::Result { + self.consume(); + + let capacity = self.inner.capacity(); + let len = len.unwrap_or(self.max_len - offset); + + // Ensure the underlying reader's cursor is set to the correct position, + // taking into account the unread part of the BufReader internal buffer. + // Otherwise, into_inner() will give a reader that is further in the + // stream compared to what we are currently looking at because BufReader + // pre-loaded some content. + self.inner.stream_position()?; + let reader = self.inner.into_inner().inner; + + Ok(BorrowingBufReader::new_with( + reader, + Some(capacity), + offset, + len, + self.max_len, + )) + } +} + +struct CursorReader { + // This Mutex is necessary for types to be Send/Sync. The cost might seem high, but it's + // actually quite low as CursorReader is always used behind a BufReader. + inner: Arc>, + last_offset: FileOffset, + offset: FileOffset, +} + +impl Clone for CursorReader { + fn clone(&self) -> Self { + CursorReader { + inner: self.inner.clone(), + offset: self.offset, + last_offset: self.last_offset, + } + } +} + +impl CursorReader { + fn new(reader: Arc>, offset: FileOffset, len: FileSize) -> Self { + CursorReader { + inner: reader.clone(), + last_offset: offset + len, + offset, + } + } +} + +impl Read for CursorReader +where + T: Read + Seek, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let mut reader = self.inner.lock().unwrap(); + reader.seek(SeekFrom::Start(self.offset))?; + let mut count = reader.read(buf)?; + self.offset += mem2file(count); + + if self.offset > self.last_offset { + let rewind = file2mem(self.offset - self.last_offset); + count = if rewind > count { 0 } else { count - rewind }; + self.offset = self.last_offset; + } + Ok(count) + } +} + +impl Seek for CursorReader +where + T: Seek, +{ + fn seek(&mut self, pos: SeekFrom) -> io::Result { + let mut reader = self.inner.lock().unwrap(); + let pos = match pos { + SeekFrom::Current(x) => { + let start = if x > 0 { + self.offset + x.unsigned_abs() + } else { + self.offset - x.unsigned_abs() + }; + SeekFrom::Start(start) + } + pos => pos, + }; + self.offset = reader.seek(pos)?; + Ok(self.offset) + } +} + +fn read<'a, T>(reader: &mut T, count: MemSize, alloc: &'a ScratchAlloc) -> io::Result<&'a [u8]> +where + T: Read, +{ + let mut out = OwnedScratchBox::with_capacity_in(count, alloc); + reader.read_exact(out.deref_mut())?; + Ok(out.leak()) +} + +#[inline] +fn file_len(stream: &mut T) -> io::Result +where + T: Seek, +{ + let old_pos = stream.stream_position()?; + let len = stream.seek(SeekFrom::End(0))?; + stream.seek(SeekFrom::Start(old_pos))?; + Ok(len) +} + +pub trait DecodeBinary: Sized { + fn decode(buf: &[u8], endianness: Endianness) -> io::Result; +} + +macro_rules! impl_DecodeBinary { + ( $($ty:ty),* ) => { + $( + impl DecodeBinary for $ty { + #[inline] + fn decode(buf: &[u8], endianness: Endianness) -> io::Result { + match buf.try_into() { + Ok(buf) => Ok(match endianness { + Endianness::Little => Self::from_le_bytes(buf), + Endianness::Big => Self::from_be_bytes(buf), + }), + Err(_) => Err(ErrorKind::UnexpectedEof.into()) + } + } + } + )* + } +} + +impl_DecodeBinary!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize); diff --git a/tools/trace-parser/traceevent/src/iterator.rs b/tools/trace-parser/traceevent/src/iterator.rs new file mode 100644 index 0000000000..04e9f82a5e --- /dev/null +++ b/tools/trace-parser/traceevent/src/iterator.rs @@ -0,0 +1,182 @@ +use core::{ + cmp::{Ordering, Reverse}, + fmt::{Debug, Error, Formatter}, + iter::Iterator, + ops::{Deref, DerefMut}, +}; +use std::collections::BinaryHeap; + +pub struct SavedIterator +where + I: Iterator, +{ + iter: I, + item: Option, +} + +impl Debug for SavedIterator +where + I: std::iter::Iterator, + ::Item: Debug, +{ + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + f.debug_struct("SavedIterator") + .field("item", &self.item) + .finish_non_exhaustive() + } +} + +impl PartialEq for SavedIterator +where + I: Iterator, + ::Item: PartialEq, +{ + #[inline] + fn eq(&self, other: &Self) -> bool { + self.item == other.item + } +} + +impl Eq for SavedIterator +where + I: Iterator, + ::Item: PartialEq, +{ +} + +impl PartialOrd for SavedIterator +where + I: Iterator, + ::Item: PartialOrd, +{ + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + PartialOrd::partial_cmp(&self.item, &other.item) + } +} + +impl Ord for SavedIterator +where + I: Iterator, + ::Item: Ord, +{ + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + Ord::cmp(&self.item, &other.item) + } +} + +pub struct MergedIterator +where + I: IntoIterator, +{ + curr: Reverse>, + heap: BinaryHeap>>, +} + +impl MergedIterator +where + I: IntoIterator, + SavedIterator<::IntoIter>: Ord, +{ + pub fn new>(iterators: II) -> Option { + let mut iterators = iterators.into_iter().map(|i| i.into_iter()); + + let first = iterators.next()?; + let curr = Reverse(SavedIterator { + item: None, + iter: first, + }); + + let mut heap = BinaryHeap::with_capacity(iterators.size_hint().0); + heap.extend(iterators.filter_map(|mut iter| match iter.next() { + None => None, + item => Some(Reverse(SavedIterator { item, iter })), + })); + + Some(MergedIterator { curr, heap }) + } +} + +impl Iterator for MergedIterator +where + I: IntoIterator, + SavedIterator<::IntoIter>: Ord, +{ + type Item = I::Item; + + #[inline] + fn next(&mut self) -> Option { + match self.curr.0.iter.next() { + // We exhausted the last selected iterator, so we just drop it and + // select the highest heap item. + None => { + self.curr = self.heap.pop()?; + } + new => { + self.curr.0.item = new; + // If the highest in the heap is higher than the last selected + // iterator, we select a new iterator and put back the other one in + // the heap. + if let Some(mut highest) = self.heap.peek_mut() { + if highest.deref() > &self.curr { + core::mem::swap(highest.deref_mut(), &mut self.curr); + } + } + } + } + + // SAFETY: We guarantee that we will not call next() on the iterator + // that produced this value until the next iteration. + core::mem::take(&mut self.curr.0.item) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test(iterators: Vec>, expected: Vec) { + // eprintln!("============================="); + // eprintln!("{:?} => {:?}", iterators, expected); + let merged = MergedIterator::new(iterators).expect("No iterator to merge"); + assert_eq!(merged.collect::>(), expected); + } + + #[test] + fn iterator_test() { + test(vec![vec![1, 3, 5], vec![2, 4]], vec![1, 2, 3, 4, 5]); + test(vec![vec![1, 3], vec![2, 4, 6]], vec![1, 2, 3, 4, 6]); + + test(vec![vec![1, 3, 5], vec![2, 4, 6]], vec![1, 2, 3, 4, 5, 6]); + test(vec![vec![2, 4, 6], vec![1, 3, 5]], vec![1, 2, 3, 4, 5, 6]); + test(vec![vec![], vec![2, 4, 6]], vec![2, 4, 6]); + test(vec![vec![1, 3, 5], vec![]], vec![1, 3, 5]); + + test( + vec![vec![1, 2, 3], vec![4, 5, 6], vec![7, 8, 9]], + vec![1, 2, 3, 4, 5, 6, 7, 8, 9], + ); + test( + vec![vec![7, 8, 9], vec![4, 5, 6], vec![1, 2, 3]], + vec![1, 2, 3, 4, 5, 6, 7, 8, 9], + ); + test( + vec![vec![], vec![4, 5, 6], vec![1, 2, 3]], + vec![1, 2, 3, 4, 5, 6], + ); + test( + vec![vec![4, 5, 6], vec![], vec![1, 2, 3]], + vec![1, 2, 3, 4, 5, 6], + ); + test( + vec![vec![4, 6], vec![5], vec![1, 2, 3]], + vec![1, 2, 3, 4, 5, 6], + ); + test(vec![vec![4, 6], vec![], vec![1, 2, 3]], vec![1, 2, 3, 4, 6]); + + test(vec![vec![4, 6], vec![1, 2, 3], vec![]], vec![1, 2, 3, 4, 6]); + test(vec![vec![], vec![4, 6], vec![1, 2, 3]], vec![1, 2, 3, 4, 6]); + } +} diff --git a/tools/trace-parser/traceevent/src/lib.rs b/tools/trace-parser/traceevent/src/lib.rs new file mode 100644 index 0000000000..71644d39f9 --- /dev/null +++ b/tools/trace-parser/traceevent/src/lib.rs @@ -0,0 +1,18 @@ +mod closure; +mod compress; +mod grammar; +mod iterator; +mod memo; +mod nested_pointer; +mod parser; +mod error; + +pub mod array; +pub mod buffer; +pub mod cinterp; +pub mod cparser; +pub mod header; +pub mod io; +pub mod print; +pub mod scratch; +pub mod str; diff --git a/tools/trace-parser/traceevent/src/memo.rs b/tools/trace-parser/traceevent/src/memo.rs new file mode 100644 index 0000000000..018215d1a3 --- /dev/null +++ b/tools/trace-parser/traceevent/src/memo.rs @@ -0,0 +1,78 @@ +use core::{ + fmt, + ops::{Deref, DerefMut}, +}; + +use once_cell::sync::OnceCell; + +#[derive(Clone)] +pub(crate) struct Memo { + val: OnceCell, + pub seed: Seed, + pub f: F, +} + +impl Memo +where + F: Fn(&Seed) -> T + Send + Sync, +{ + #[inline] + pub fn new(seed: Seed, f: F) -> Self { + Memo { + val: OnceCell::new(), + seed, + f, + } + } + + // #[inline] + // pub fn memoized(&self) -> Option<&T> { + // self.val.get() + // } + + // #[inline] + // pub fn memoized_mut(&mut self) -> Option<&mut T> { + // self.val.get_mut() + // } + + #[inline] + pub fn into_owned(mut self) -> T { + let _ = self.eval(); + self.val.take().unwrap() + } + + #[inline] + fn eval(&self) -> &T { + self.val.get_or_init(|| (self.f)(&self.seed)) + } +} + +impl Deref for Memo +where + F: Fn(&Seed) -> T + Send + Sync, +{ + type Target = T; + + #[inline] + fn deref(&self) -> &Self::Target { + self.eval() + } +} + +impl DerefMut for Memo +where + F: Fn(&Seed) -> T + Send + Sync, +{ + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + let _ = self.eval(); + self.val.get_mut().unwrap() + } +} + +impl fmt::Debug for Memo { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.debug_struct("Memo").finish_non_exhaustive() + } +} diff --git a/tools/trace-parser/traceevent/src/nested_pointer.rs b/tools/trace-parser/traceevent/src/nested_pointer.rs new file mode 100644 index 0000000000..6cb23fe1d4 --- /dev/null +++ b/tools/trace-parser/traceevent/src/nested_pointer.rs @@ -0,0 +1,44 @@ +use core::{fmt::Debug, ops::Deref}; + +#[derive(Clone, Debug)] +pub(crate) struct NestedPointer { + outer: Outer, +} +impl NestedPointer { + #[inline] + pub fn new(outer: Outer) -> Self { + NestedPointer { outer } + } + + // #[inline] + // pub fn into_outer(self) -> Outer { + // self.outer + // } +} + +impl Deref for NestedPointer +where + Outer: Deref, + Outer::Target: Deref, +{ + type Target = ::Target; + + fn deref(&self) -> &Self::Target { + &self.outer + } +} + +impl AsRef for NestedPointer +where + // We can't use AsRef here because otherwise the compiler might be left with + // multiple choices of N. With Deref, only one choice is possible since + // Target is an associated type. + Outer: Deref, + Inner: Deref + ?Sized, + T: ?Sized, +{ + #[inline] + fn as_ref(&self) -> &T { + self.deref() + } +} diff --git a/tools/trace-parser/traceevent/src/parser.rs b/tools/trace-parser/traceevent/src/parser.rs new file mode 100644 index 0000000000..ad1a6e80a4 --- /dev/null +++ b/tools/trace-parser/traceevent/src/parser.rs @@ -0,0 +1,499 @@ +use core::{ + fmt::{Debug, Display, Formatter}, + ops::Range, +}; +use std::string::String as StdString; + +use nom::{ + bytes::complete::is_a, + character::complete::{char, multispace0}, + combinator::all_consuming, + error::{ContextError, ErrorKind, FromExternalError, ParseError}, + sequence::delimited, + Finish as _, Parser, +}; + +pub trait FromParseError: Sized { + fn from_parse_error(input: I, err: &E) -> Self; +} + +impl FromParseError for () { + fn from_parse_error(_input: I, _err: &E) -> Self {} +} + +#[derive(Clone, PartialEq)] +pub struct VerboseParseError { + input: String, + errors: Vec<(Range, nom::error::VerboseErrorKind)>, +} + +impl VerboseParseError { + pub fn from_input>(input: I) -> Self { + let input = input.as_ref(); + let input = StdString::from_utf8_lossy(input).into_owned(); + VerboseParseError { + input, + errors: vec![], + } + } + pub fn new, I2: AsRef<[u8]>>( + input: I, + err: &nom::error::VerboseError, + ) -> Self { + match core::str::from_utf8(input.as_ref()) { + Err(err) => VerboseParseError { + input: format!(""), + errors: vec![], + }, + Ok(input) => { + let errors = err + .errors + .iter() + .map(|(s, k)| { + let s = s.as_ref(); + let offset = s.as_ptr() as usize - input.as_ptr() as usize; + let size = s.len(); + let range = offset..(offset + size); + (range, k.clone()) + }) + .collect(); + + VerboseParseError { + input: input.into(), + errors, + } + } + } + } +} + +impl Eq for VerboseParseError {} + +impl Debug for VerboseParseError { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + write!(f, "VerboseParseError {{{self}}}",) + } +} + +impl Display for VerboseParseError { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + let input = self.input.as_str(); + let mut seen_context = false; + let inner = nom::error::VerboseError { + errors: self + .errors + .iter() + // Preserve the leaf-most levels that don't have a + // context, but after the first context is + // encountered, display only levels with a context. + // This makes the path much easier to follow if all + // relevant levels are annotated correctly. + .filter(|(_, kind)| match kind { + nom::error::VerboseErrorKind::Context(..) => { + seen_context = true; + true + } + _ => !seen_context, + }) + .map(|(range, k)| (&input[range.clone()], k.clone())) + .collect(), + }; + write!( + f, + "Error while parsing:\n{}\n{}\n", + input, + &nom::error::convert_error(input, inner) + )?; + Ok(()) + } +} + +/// Tie together a nom error and some user-defined data. +#[derive(Debug)] +pub struct NomError { + /// User-defined data. + pub data: Option, + /// nom error, such as [nom::error::Error] + pub inner: E, +} + +impl NomError { + #[inline] + fn from_inner(inner: E) -> Self + where + T: FromParseError, + { + NomError { data: None, inner } + } + + pub fn into_external(self, input: I, mut convert: F) -> nom::IResult + where + F: FnMut(T) -> T2, + E: ParseError, + E2: ParseError + FromExternalError, + { + match self.data { + Some(data) => error(input, convert(data)), + None => Err(nom::Err::Error(E2::from_error_kind( + input, + nom::error::ErrorKind::Fail, + ))), + } + } +} + +impl ParseError for NomError +where + I: Clone, + E: ParseError, + T: FromParseError, +{ + #[inline] + fn from_error_kind(input: I, kind: ErrorKind) -> Self { + NomError::from_inner(E::from_error_kind(input, kind)) + } + + #[inline] + fn from_char(input: I, c: char) -> Self { + NomError::from_inner(E::from_char(input, c)) + } + + #[inline] + fn append(input: I, kind: ErrorKind, other: Self) -> Self { + NomError { + inner: E::append(input.clone(), kind, other.inner), + data: other.data, + } + } + + #[inline] + fn or(self, other: Self) -> Self { + NomError { + data: other.data, + inner: self.inner.or(other.inner), + } + } +} + +impl FromExternalError for NomError +where + E: ParseError, +{ + #[inline] + fn from_external_error(input: I, kind: ErrorKind, e: T) -> Self { + NomError { + data: Some(e), + inner: E::from_error_kind(input, kind), + } + } +} + +impl ContextError for NomError +where + E: ContextError, +{ + #[inline] + fn add_context(input: I, ctx: &'static str, other: Self) -> Self { + NomError { + data: other.data, + inner: E::add_context(input, ctx, other.inner), + } + } +} + +////////////// +// Conversions +////////////// + +pub fn to_str(s: &[u8]) -> StdString { + StdString::from_utf8_lossy(s).to_string() +} + +////////////////////// +// Parsers +////////////////////// + +pub fn hex_u64(input: I) -> nom::IResult +where + E: ParseError, + I: Clone, + I: nom::AsBytes + nom::InputIter + nom::InputTakeAtPosition, +{ + is_a(&b"0123456789abcdefABCDEF"[..]) + .map(|x: I| { + x.as_bytes() + .iter() + .rev() + .enumerate() + .map(|(k, v)| -> u64 { + let v: char = (*v).into(); + let v: u64 = v.to_digit(16).unwrap_or(0).into(); + v << (k * 4) + }) + .sum() + }) + .parse(input) +} + +////////////////////// +// Generic combinators +////////////////////// + +pub trait NomParserExt: nom::Parser> { + #[inline] + fn parse_finish(&mut self, input: I) -> Result + where + I: nom::InputLength + Clone + Debug, + NE: Debug + ParseError, + E: Debug + FromParseError, + { + let mut parser = all_consuming(|input| self.parse(input)); + match parser.parse(input.clone()).finish() { + Err(err) => match err.data { + None => Err(E::from_parse_error(input, &err.inner)), + Some(err) => Err(err), + }, + Ok((_, x)) => Ok(x), + } + } +} + +impl NomParserExt for P where P: nom::Parser> {} + +#[allow(unused)] +pub fn print(name: &'static str, mut inner: P) -> impl nom::Parser +where + E: ParseError, + P: nom::Parser, + I: core::convert::AsRef<[u8]> + Clone, + O: Debug, +{ + move |input: I| { + let (i, x) = inner.parse(input.clone())?; + println!( + "{name} input={:?} out={x:?} new_input={:?}", + to_str(input.as_ref()), + to_str(i.as_ref()) + ); + Ok((i, x)) + } +} + +pub fn lexeme(inner: P) -> impl nom::Parser +where + E: ParseError, + P: nom::Parser, + I: Clone + nom::InputLength + nom::InputIter + nom::InputTake + nom::InputTakeAtPosition, + ::Item: Clone + nom::AsChar, + ::Item: Clone + nom::AsChar, +{ + delimited(multispace0, inner, multispace0) +} + +pub fn parenthesized(parser: P) -> impl nom::Parser +where + P: nom::Parser, + E: ParseError, + I: nom::Slice> + + nom::InputIter + + Clone + + nom::InputLength + + nom::InputTake + + nom::InputTakeAtPosition, + ::Item: Clone + nom::AsChar, + ::Item: Clone + nom::AsChar, +{ + delimited(lexeme(char('(')), parser, lexeme(char(')'))) +} + +pub fn map_res_cut, E2, F, G>( + mut parser: F, + mut f: G, +) -> impl nom::Parser +where + F: Parser, + G: FnMut(O1) -> Result, +{ + move |input: I| { + let i = input.clone(); + let (input, x) = parser.parse(input)?; + match f(x) { + Ok(x) => Ok((input, x)), + Err(err) => Err(nom::Err::Failure(E::from_external_error( + i, + ErrorKind::MapRes, + err, + ))), + } + } +} + +// // Not available in nom 7 but maybe will be there in nom 8: +// // https://github.com/rust-bakery/nom/issues/1422 +// pub fn map_err(mut parser: P, f: F) -> impl nom::Parser +// where +// P: nom::Parser, +// E: ParseError, +// E2: ParseError + FromExternalError, +// F: Fn(E) -> MappedE, +// I: Clone, +// { +// move |input: I| match parser.parse(input.clone()) { +// Err(nom::Err::Error(e)) => Err(nom::Err::Error(E2::from_external_error( +// input, +// ErrorKind::Fail, +// f(e), +// ))), +// Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(E2::from_external_error( +// input, +// ErrorKind::Fail, +// f(e), +// ))), +// Err(nom::Err::Incomplete(x)) => Err(nom::Err::Incomplete(x)), +// Ok(x) => Ok(x), +// } +// } + +pub fn success_with(mut f: F) -> impl FnMut(I) -> nom::IResult +where + F: FnMut() -> O, + E: ParseError, +{ + move |input: I| Ok((input, f())) +} + +#[inline] +pub fn error(input: I, err: E) -> nom::IResult +where + E2: FromExternalError, +{ + Err(nom::Err::Error(E2::from_external_error( + input, + ErrorKind::Fail, + err, + ))) +} + +#[inline] +pub fn failure(input: I, err: E) -> nom::IResult +where + E2: FromExternalError, +{ + Err(nom::Err::Failure(E2::from_external_error( + input, + ErrorKind::Fail, + err, + ))) +} + +// pub fn null_terminated_str_parser<'a, E>() -> impl nom::Parser<&'a [u8], &'a [u8], E> +// where +// E: ParseError<&'a [u8]>, +// { +// terminated( +// take_until(&[0][..]), +// // Consume the null terminator +// tag([0]), +// ) +// } + +pub trait DisplayErr { + fn display_err(&self) -> StdString; +} +pub trait DisplayErrViaDisplay {} + +impl DisplayErrViaDisplay for crate::cparser::CParseError {} +impl DisplayErrViaDisplay for crate::header::HeaderError {} +impl DisplayErrViaDisplay for crate::print::PrintFmtError {} + +impl DisplayErr for T +where + T: DisplayErrViaDisplay + Display, +{ + fn display_err(&self) -> StdString { + format!("{}", self) + } +} + +impl DisplayErr for () { + fn display_err(&self) -> StdString { + "".into() + } +} + +#[cfg(test)] +pub(crate) mod tests { + use nom::{error::VerboseError, Finish as _}; + + use super::*; + + // Work-around this issue: + // https://github.com/rust-bakery/nom/issues/1619 + // This function _must_ preserve the address of buf, as + // nom::error::convert_error() relies on VerboseError input stack to be + // pointer into the overall input. Otherwise, pointer arithmetic will + // make no sense and it will either display non-sensical substrings or + // panic. + pub fn zero_copy_to_str(buf: &[u8]) -> &str { + std::str::from_utf8(buf).unwrap() + } + + pub fn run_parser(input: I, parser: P) -> O + where + O: Debug + PartialEq, + P: Parser>>, + I: nom::AsBytes + nom::InputLength + Clone, + T: DisplayErr + FromParseError>, + { + let mut parser = all_consuming(parser); + let parsed = parser.parse(input.clone()).finish(); + let input = zero_copy_to_str(input.as_bytes()); + match parsed { + Ok((_, parsed)) => parsed, + Err(err) => { + // Convert input from &[u8] to &str so convert_error() can + // display it. + let mut seen_context = false; + let inner = VerboseError { + errors: err + .inner + .errors + .iter() + // Preserve the leaf-most levels that don't have a + // context, but after the first context is + // encountered, display only levels with a context. + // This makes the path much easier to follow if all + // relevant levels are annotated correctly. + .filter(|(_, kind)| match kind { + nom::error::VerboseErrorKind::Context(..) => { + seen_context = true; + true + } + _ => !seen_context, + }) + .map(|(s, err)| (zero_copy_to_str(s.as_bytes()), err.clone())) + .collect(), + }; + let loc = nom::error::convert_error(input, inner); + let err_data = match err.data { + Some(data) => data.display_err(), + None => "".into(), + }; + panic!("Could not parse {input:?}: {err_data} :\n{loc}") + } + } + } + + pub fn test_parser(expected: O, input: I, parser: P) + where + O: Debug + PartialEq, + T: DisplayErr + FromParseError>, + P: Parser>>, + I: nom::AsBytes + nom::InputLength + Clone, + { + let parsed = run_parser(input.clone(), parser); + + let input = zero_copy_to_str(input.as_bytes()); + assert_eq!(parsed, expected, "while parsing: {input:?}"); + } +} diff --git a/tools/trace-parser/traceevent/src/print.rs b/tools/trace-parser/traceevent/src/print.rs new file mode 100644 index 0000000000..fceee7bfb5 --- /dev/null +++ b/tools/trace-parser/traceevent/src/print.rs @@ -0,0 +1,1505 @@ +use core::{ + cmp::Ordering, + fmt, + fmt::{Debug, Write as _}, + str::{from_utf8, Utf8Error}, +}; +use std::{error::Error, io}; + +use bitflags::bitflags; +use itertools::Itertools as _; +use nom::{ + branch::alt, + bytes::complete::{is_not, tag}, + character::complete::{char, u64 as txt_u64}, + combinator::{cut, opt, success}, + error::{context, ContextError, FromExternalError, ParseError}, + multi::{many0, many1}, + sequence::{preceded, separated_pair, tuple}, + Parser, +}; +use once_cell::sync::OnceCell; +use smartstring::alias::String; + +use crate::{ + buffer::{BufferError, VBinDecoder}, + cinterp::{CompileError, EvalEnv, EvalError, InterpError, SockAddr, SockAddrKind, Value}, + cparser::{Expr, CParseError}, + header::{Abi, Endianness, Header, LongSize, Signedness}, + parser::{map_res_cut, FromParseError, NomParserExt as _, VerboseParseError}, + str::{InnerStr, Str}, + error::convert_err_impl, +}; + +#[derive(Debug, Clone)] +pub struct PrintFmtStr { + pub atoms: Vec, + pub(crate) vbin_decoders: OnceCell>, +} + +impl PartialEq for PrintFmtStr { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.atoms == other.atoms + } +} + +impl Eq for PrintFmtStr {} + +impl PartialOrd for PrintFmtStr { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for PrintFmtStr { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.atoms.cmp(&other.atoms) + } +} + +pub struct StringWriter { + inner: W, +} + +impl StringWriter { + #[inline] + pub fn new(inner: W) -> Self { + StringWriter { inner } + } + + #[inline] + pub fn into_inner(self) -> W { + self.inner + } +} + +impl io::Write for StringWriter { + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result { + self.inner.write(buf) + } + + #[inline] + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} + +impl fmt::Write for StringWriter { + #[inline] + fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { + io::Write::write(self, s.as_bytes()) + .map(|_| ()) + .map_err(|_| fmt::Error) + } +} + +struct TrackedWriter { + inner: W, + count: usize, +} + +impl TrackedWriter { + #[inline] + fn new(inner: W) -> Self { + TrackedWriter { count: 0, inner } + } +} + +impl fmt::Write for TrackedWriter { + #[inline] + fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { + self.inner.write_str(s)?; + self.count += s.len(); + Ok(()) + } +} + +struct SinkWriter; + +impl fmt::Write for SinkWriter { + #[inline] + fn write_str(&mut self, _: &str) -> Result<(), fmt::Error> { + Ok(()) + } +} + +#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum PrintError { + #[error("Dynamic width in printf format is missing")] + PrintFmtMissingWidth, + + #[error("Dynamic precision in printf format is missing")] + PrintFmtMissingPrecision, + + #[error("Missing value to interpolate in the format string")] + MissingValue, + + #[error("Address is not an integer")] + NonNumericAddr, + + #[error("Value cannot be formatted as a buffer: {}", match .0 { + Some(s) => s.to_string(), + None => "".into() + })] + NotABuffer(Option>), + + #[error("Value cannot be formatted as a string: {}", match .0 { + Some(s) => s.to_string(), + None => "".into() + })] + NotAString(Option>), + + #[error("Value cannot be formatted as an integer: {}", match .0 { + Some(s) => s.to_string(), + None => "".into() + })] + NotAnInteger(Option>), + + #[error("Value cannot be formatted as a sockaddr: {}", match .0 { + Some(s) => s.to_string(), + None => "".into() + })] + NotASockAddr(Option>), + + #[error("SockAddr is not a valid IP address")] + NotAnIpAddr, + + #[error("Specifier not implemented: {0:?}")] + SpecifierNotHandled(PrintSpecifier), + + #[error("Error while formatting string: {0}")] + FmtError(Box), + + #[error("Error while decoding buffer: {0}")] + BufferError(Box), + + #[error("Error while interpreting expression: {0}")] + InterpError(Box), +} + +convert_err_impl!(BufferError, BufferError, PrintError); +convert_err_impl!(EvalError, InterpError, PrintError); +convert_err_impl!(CompileError, InterpError, PrintError); +convert_err_impl!(fmt::Error, FmtError, PrintError); + +pub struct PrintArg<'a> { + pub width: Option, + pub precision: Option, + pub value: Value<'a>, +} + +impl PrintFmtStr { + pub fn interpolate_values<'v, 'ee, E, W, I, EE>( + &self, + header: &'v Header, + env: &'ee EE, + out: &mut W, + values: I, + ) -> Result<(), PrintError> + where + EE: EvalEnv<'ee> + ?Sized, + E: Error, + W: fmt::Write + ?Sized, + I: IntoIterator, E>>, + PrintError: From, + { + let mut values = values.into_iter(); + + macro_rules! get_int { + ($err:expr) => { + match values.next() { + Some(Err(err)) => Err(err.into()), + Some(Ok(Value::U64Scalar(x))) => Ok(x.try_into().unwrap()), + Some(Ok(Value::I64Scalar(x))) if x >= 0 => { + Ok(x.unsigned_abs().try_into().unwrap()) + } + _ => Err($err), + } + }; + } + + let print_values = self.atoms.iter().filter_map(|atom| match atom { + PrintAtom::Fixed(_) => None, + PrintAtom::Variable { + width, precision, .. + } => Some(|| -> Result, PrintError> { + let width = match width { + PrintWidth::Dynamic => Some(get_int!(PrintError::PrintFmtMissingWidth)?), + _ => None, + }; + let precision = match precision { + PrintPrecision::Dynamic => { + Some(get_int!(PrintError::PrintFmtMissingPrecision)?) + } + _ => None, + }; + let value = values.next().ok_or(PrintError::MissingValue)??; + Ok(PrintArg { + value, + width, + precision, + }) + }()), + }); + self.interpolate_into::(header, env, out, print_values) + } + + pub fn interpolate_vbin<'v, 'ee, W, EE>( + &self, + header: &'v Header, + env: &'ee EE, + out: &mut W, + buf: &'v [u32], + ) -> Result<(), PrintError> + where + W: fmt::Write + ?Sized, + EE: EvalEnv<'ee> + ?Sized, + { + self.interpolate_into( + header, + env, + out, + self.vbin_fields(header, env.scratch(), buf), + ) + } + + fn interpolate_into<'v, 'ee, E, W, I, EE>( + &self, + header: &'v Header, + env: &'ee EE, + out: &mut W, + values: I, + ) -> Result<(), PrintError> + where + E: Error, + EE: EvalEnv<'ee> + ?Sized, + W: fmt::Write + ?Sized, + I: IntoIterator, E>>, + PrintError: From, + { + let out = &mut TrackedWriter::new(out); + let mut values = values.into_iter(); + + let mut print_variable = |out: &mut TrackedWriter<&mut W>, + print_spec: &PrintSpecifier, + flags: &PrintFlags, + width: &PrintWidth, + precision: &PrintPrecision| + -> Result<_, PrintError> { + let item = values.next().ok_or(PrintError::MissingValue)??; + let vanilla = print_spec == &PrintSpecifier::Dec + && width == &PrintWidth::Unmodified + && precision == &PrintPrecision::Unmodified + && flags.is_empty(); + + let width = match width { + PrintWidth::Fixed(x) => Ok(*x), + PrintWidth::Unmodified => Ok(0), + PrintWidth::Dynamic => match item.width { + None => Err(PrintError::PrintFmtMissingWidth), + Some(x) => Ok(x), + }, + }?; + + let precision = match precision { + PrintPrecision::Fixed(x) => Ok(Some(*x)), + PrintPrecision::Unmodified => Ok(None), + PrintPrecision::Dynamic => match item.precision { + None => Err(PrintError::PrintFmtMissingPrecision), + Some(x) => Ok(Some(x)), + }, + }?; + + #[derive(Debug)] + enum Justification { + Left, + Right, + } + + #[allow(clippy::enum_variant_names)] + #[derive(Debug)] + enum Sign { + OnlyNeg, + PosAndNeg, + BlankAndNeg, + } + + #[derive(Debug)] + enum BasePrefix { + None, + Oct, + LowerHex, + UpperHex, + } + + #[derive(Debug)] + enum Padding { + Zero, + Space, + } + + let justification = if flags.contains(PrintFlags::LeftJustify) { + Justification::Left + } else { + Justification::Right + }; + + let sign = if flags.contains(PrintFlags::PositiveSign) { + Sign::PosAndNeg + } else if flags.contains(PrintFlags::SignPlaceholder) { + Sign::BlankAndNeg + } else { + Sign::OnlyNeg + }; + + let base_prefix = if flags.contains(PrintFlags::BasePrefix) { + match print_spec { + PrintSpecifier::UpperHex => BasePrefix::UpperHex, + PrintSpecifier::Hex => BasePrefix::LowerHex, + PrintSpecifier::Oct => BasePrefix::Oct, + _ => BasePrefix::None, + } + } else { + BasePrefix::None + }; + + let padding = if flags.contains(PrintFlags::ZeroPad) { + Padding::Zero + } else { + Padding::Space + }; + + // eprintln!("VALUE {val:?} just={justification:?} sign={sign:?} base={base_prefix:?} pad={padding:?} width={width:?} precision={precision:?}"); + + macro_rules! print_left { + ($x:expr, $print_spec:expr, $width:expr, $precision:expr, $discount_prefix:expr, $out:expr) => {{ + let x = $x; + let print_spec = &$print_spec; + let width = $width; + let precision = $precision; + let discount_prefix: bool = $discount_prefix; + let out: &mut _ = $out; + + let start = out.count; + + match print_spec { + PrintSpecifier::Dec => + { + #[allow(unused_comparisons)] + if x < 0 { + write!(out, "-") + } else { + match sign { + Sign::OnlyNeg => Ok(()), + Sign::PosAndNeg => write!(out, "+"), + Sign::BlankAndNeg => write!(out, " "), + } + } + } + _ => match base_prefix { + BasePrefix::LowerHex => write!(out, "0x"), + BasePrefix::UpperHex => write!(out, "0X"), + BasePrefix::Oct => write!(out, "0"), + BasePrefix::None => Ok(()), + }, + }?; + + let precision = precision.unwrap_or(0); + let so_far = out.count - start; + let precision = if discount_prefix && so_far <= precision { + precision - so_far + } else { + precision + }; + + #[allow(unused_comparisons)] + let abs = if x < 0 { 0 - x } else { x }; + match print_spec { + PrintSpecifier::Hex => { + write!(out, "{:0>precision$x}", abs) + } + PrintSpecifier::UpperHex => { + write!(out, "{:0>precision$X}", abs) + } + PrintSpecifier::Oct => { + write!(out, "{:0>precision$o}", abs) + } + _ => write!(out, "{:0>precision$}", abs), + }?; + + let so_far = out.count - start; + if width > so_far { + write!(out, "{: >pad$}", "", pad = width - so_far)?; + }; + Ok(()) + }}; + } + + macro_rules! print_integral { + ($x:expr, $print_spec:expr) => {{ + let x = $x; + let print_spec = $print_spec; + match justification { + // If width == 0, justification is irrelevant so + // we can use the simpler path. + Justification::Right if width > 0 => { + let (discount_prefix, precision) = match precision { + Some(precision) => (false, Some(precision)), + None => ( + // If we use the width as precision, the + // precision will have to be reduced by + // the amount of prefix otherwise the + // overall width will not be respected. + // + // We still use the width as precision + // as this allows interposing extra + // zeros between the prefix and the + // value. + true, + match padding { + Padding::Zero => Some(width), + Padding::Space => Some(0), + }, + ), + }; + + let sink = &mut TrackedWriter::new(SinkWriter); + print_left!(x, print_spec, 0, precision, discount_prefix, sink)?; + if width > sink.count { + write!(out, "{: print_left!(x, print_spec, width, precision, false, out), + } + }}; + } + + let mut print_str = |s: &str| { + let s = match precision { + Some(x) => s.get(..x).unwrap_or(s), + None => s, + }; + // Remove trainling "\n". They appear in "print" event when userspace used + // "echo" to write to the trace_marker file. + let s = s.trim_end_matches('\n'); + match justification { + Justification::Left => write!(out, "{s: write!(out, "{s: >width$}"), + } + }; + + let print_symbol = + |out: &mut TrackedWriter<&mut W>, addr, show_offset| -> Result<_, PrintError> { + let addr = match addr { + Value::U64Scalar(x) => Ok(x), + Value::I64Scalar(x) => Ok(x as u64), + addr => match addr.to_str() { + Some(s) => return Ok(write!(out, "{s}")?), + None => Err(PrintError::NonNumericAddr), + }, + }?; + Ok(match header.sym_at(addr) { + Some((offset, size, name)) => { + if show_offset { + match size { + Some(size) => write!(out, "{name}+{offset:#x}/{size:#x}"), + None => write!(out, "{name}+{offset:#x}"), + } + } else { + write!(out, "{name}") + } + } + None => write!(out, "{addr:#x}"), + }?) + }; + + // T this is a bit crude, as we display addresses always + // the same way, without taking into account user hints. + let print_sockaddr = + |out: &mut TrackedWriter<&mut W>, s: &SockAddr| match s.to_socketaddr() { + Ok(addr) => Ok(write!(out, "{addr}")?), + _ => match s.to_ipaddr() { + Ok(addr) => Ok(write!(out, "{addr}")?), + _ => Err(PrintError::NotAnIpAddr), + }, + }; + + macro_rules! print_hex_buffer { + ($out:expr, $arr:expr, $sep:expr) => {{ + let arr = $arr; + let sep = $sep; + + let n = if width == 0 { usize::MAX } else { width }; + for (i, x) in arr.into_iter().take(n).enumerate() { + if i != 0 { + match sep { + HexBufferSeparator::C => $out.write_char(':'), + HexBufferSeparator::D => $out.write_char('-'), + HexBufferSeparator::N => Ok(()), + }?; + } + write!($out, "{x:02x}")?; + } + Ok(()) + }}; + } + + macro_rules! handle_sockaddr { + ($out:expr, $kind:expr, $val:expr, $endianness:expr) => {{ + let val: &Value = $val; + let endianness: Endianness = $endianness; + + let temp; + let sockaddr: &SockAddr = match val { + // This is not pretty but should work well + // enough: this function can be called on + // multiple paths: + // + // * When formatting an event after having + // decoded its fields. In that case, we get a + // U8Array that contains the raw sockaddr, and + // possibly in the future we will get a + // Value::SockAddr + // * When formatting a bprint buffer: we will + // get a Value::Str, since the sockaddr has + // already been rendered to a string by the + // kernel. + // + // Normally, U8Array and Str can be used + // interchangeably, but in this case we need to + // disambiguate between the 2 cases. + Value::Str(s) => return Ok(print_str(&s)?), + Value::SockAddr(addr) => addr, + Value::U8Array(arr) => { + temp = + SockAddr::from_bytes(&arr, endianness, $kind).map_err(|err| { + let err: PrintError = err.into(); + err + })?; + &temp + } + val => Err(PrintError::NotASockAddr(val.clone().into_static().ok()))?, + }; + Ok(print_sockaddr($out, sockaddr)?) + }}; + } + + let val = item.value; + Ok(match print_spec { + PrintSpecifier::Hex + | PrintSpecifier::UpperHex + | PrintSpecifier::Dec + | PrintSpecifier::Oct => match val { + // Fast path if vanilla. This should accomodate the vast + // majority of fields. + Value::U64Scalar(x) if vanilla => write!(out, "{x}"), + Value::I64Scalar(x) if vanilla => write!(out, "{x}"), + + Value::U64Scalar(x) => print_integral!(x, print_spec), + Value::I64Scalar(x) => print_integral!(x, print_spec), + + val => Err(PrintError::NotAnInteger(val.into_static().ok()))?, + }, + PrintSpecifier::Str => match val { + Value::Str(Str { + inner: InnerStr::Procedural(memo), + }) => { + memo.seed.write(out); + Ok(()) + } + _ => { + let val = val.deref_ptr(env)?; + match val.to_str() { + Some(s) => print_str(s), + None => Err(PrintError::NotAString(val.into_static().ok()))?, + } + }, + }, + PrintSpecifier::Symbol => Ok(print_symbol(out, val, false)?), + PrintSpecifier::SymbolWithOffset => Ok(print_symbol(out, val, true)?), + PrintSpecifier::HexBuffer(sep) => { + let val = val.deref_ptr(env)?; + let res = match val.to_bytes() { + Some(arr) => print_hex_buffer!(out, arr, *sep), + _ => Err(PrintError::NotABuffer(val.clone().into_static().ok()))?, + }; + res + }, + + PrintSpecifier::IpGeneric(endianness) => { + handle_sockaddr!(out, SockAddrKind::Full, &val, *endianness) + } + PrintSpecifier::Ipv4(endianness) => { + handle_sockaddr!(out, SockAddrKind::Ipv4AddrOnly, &val, *endianness) + } + PrintSpecifier::Ipv6 => { + handle_sockaddr!(out, SockAddrKind::Ipv6AddrOnly, &val, Endianness::Big) + } + + PrintSpecifier::Resource + | PrintSpecifier::DmaAddr + | PrintSpecifier::PhysAddr + | PrintSpecifier::EscapedBuffer + | PrintSpecifier::MacAddress + | PrintSpecifier::Uuid + | PrintSpecifier::Kobject + | PrintSpecifier::PageFlags + | PrintSpecifier::VmaFlags + | PrintSpecifier::GfpFlags + | PrintSpecifier::Clock + | PrintSpecifier::NetdevFeatures + | PrintSpecifier::Bitmap + | PrintSpecifier::Dentry + | PrintSpecifier::BlockDevice + | PrintSpecifier::VaFormat => match val.to_str() { + // Fallback on displaying as a string. We might get + // a string value from vbin-encoded data, where the + // kernel-rendered string was dumped in the buffer. + // This allows using trace_printk() with arguments + // that are passed by reference and then formatted + // by the kernel, ready to be displayed by us. + Some(s) => print_str(s), + None => match val { + Value::U64Scalar(x) => print_integral!(x, PrintSpecifier::Hex), + Value::I64Scalar(x) => print_integral!(x, PrintSpecifier::Hex), + Value::Raw(_typ, arr) => { + print_hex_buffer!(out, arr.iter(), HexBufferSeparator::C) + } + _ => Err(PrintError::SpecifierNotHandled(print_spec.clone()))?, + }, + }, + }?) + }; + + let mut res = Ok(()); + for atom in &self.atoms { + match atom { + PrintAtom::Fixed(s) => write!(out, "{s}")?, + PrintAtom::Variable { + print_spec, + flags, + width, + precision, + .. + } => { + if let Err(err) = print_variable(out, print_spec, flags, width, precision) { + // Write the error message inplace in the output and then return the last + // one as well. If we are printing to stdout, this allows getting values + // for the other fields and then let the caller decide whether the + // processing should be interrupted. + write!(out, "<{err}>")?; + res = Err(err); + } + } + } + } + res + } +} + +/// Format specifier as understood by vbin_printf() +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum VBinSpecifier { + U8, + I8, + + U16, + I16, + + U32, + I32, + + U64, + I64, + + Str, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum PrintWidth { + Unmodified, + Fixed(usize), + Dynamic, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum PrintPrecision { + Unmodified, + Fixed(usize), + Dynamic, +} + +bitflags! { + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] + pub struct PrintFlags: u8 { + // # + const BasePrefix = 1; + + // - + const LeftJustify = 2; + + // + + const PositiveSign = 4; + + // space + const SignPlaceholder = 8; + + // 0 + const ZeroPad = 16; + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum HexBufferSeparator { + C, + D, + N, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum PrintSpecifier { + Hex, + UpperHex, + Dec, + Oct, + Str, + + Symbol, + SymbolWithOffset, + + // %*ph + HexBuffer(HexBufferSeparator), + + Resource, + DmaAddr, + PhysAddr, + EscapedBuffer, + MacAddress, + Ipv4(Endianness), + Ipv6, + IpGeneric(Endianness), + Uuid, + Kobject, + PageFlags, + VmaFlags, + GfpFlags, + Clock, + NetdevFeatures, + Bitmap, + Dentry, + BlockDevice, + VaFormat, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum PrintAtom { + Fixed(String), + Variable { + vbin_spec: VBinSpecifier, + print_spec: PrintSpecifier, + flags: PrintFlags, + width: PrintWidth, + precision: PrintPrecision, + }, +} + +impl PrintAtom { + #[inline] + pub(crate) fn new_variable( + _abi: &Abi, + vbin_spec: VBinSpecifier, + print_spec: PrintSpecifier, + flags: PrintFlags, + width: PrintWidth, + precision: PrintPrecision, + ) -> Self { + PrintAtom::Variable { + vbin_spec, + print_spec, + flags: PrintFlags::from_iter(flags), + width, + precision, + } + } + + pub(crate) fn zip_atoms< + 'atom, + T, + I1: Iterator, + I2: Iterator, + >( + mut args: I1, + mut atoms: I2, + ) -> impl IntoIterator)> { + let mut count = 0; + core::iter::from_fn(move || { + let arg = args.next()?; + if count > 0 { + count -= 1; + Some((arg, None)) + } else { + loop { + match atoms.next() { + Some(PrintAtom::Fixed(_)) => continue, + curr @ Some(PrintAtom::Variable { + ref width, + ref precision, + .. + }) => { + count = 0; + count += match width { + PrintWidth::Dynamic => 1, + _ => 0, + }; + count += match precision { + PrintPrecision::Dynamic => 1, + _ => 0, + }; + break Some((arg, curr)); + } + None => break Some((arg, None)), + } + } + } + }) + } +} + +#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum PrintFmtError { + #[error("Expected string literal: {0:?}")] + NotAStringLiteral(Expr), + + #[error("Could not parse the printk format string: {0}")] + CParseError(Box), + + #[error("Could not decode string as utf-8: {0}")] + DecodeUtf8(Box), + + #[error("Illegal specifier: {0}")] + IllegalSpecifier(String), +} + +impl, I2: AsRef<[u8]>> FromParseError> + for PrintFmtError +{ + fn from_parse_error(input: I, err: &nom::error::VerboseError) -> Self { + PrintFmtError::CParseError(Box::new(CParseError::ParseError(VerboseParseError::new( + input, err, + )))) + } +} + +impl> FromParseError for PrintFmtError { + fn from_parse_error(input: I, _err: &()) -> Self { + PrintFmtError::CParseError(Box::new(CParseError::ParseError( + VerboseParseError::from_input(input), + ))) + } +} + +convert_err_impl!(Utf8Error, DecodeUtf8, PrintFmtError); + +// This function has been split out and unnecessary levels of alt((...)) have +// been added to avoid hitting a compile time blowup (from <2s to type check to +// 25s, also leading to an absolute disaster in release profile) +fn specifier<'a, 'abi, E>( + abi: &'abi Abi, +) -> impl nom::Parser<&'a [u8], Result<(VBinSpecifier, PrintSpecifier), PrintFmtError>, E> + 'abi +where + E: 'abi + + ParseError<&'a [u8]> + + ContextError<&'a [u8]> + + FromExternalError<&'a [u8], PrintFmtError> + + Debug, + 'a: 'abi, +{ + let (vbin_long, vbin_ulong) = match abi.long_size { + LongSize::Bits32 => (VBinSpecifier::I32, VBinSpecifier::U32), + LongSize::Bits64 => (VBinSpecifier::I64, VBinSpecifier::U64), + }; + let vbin_char = match abi.char_signedness { + Signedness::Unsigned => VBinSpecifier::U8, + Signedness::Signed => VBinSpecifier::I8, + }; + + let ip_endianness = || { + alt((char('h'), char('n'), char('b'), char('l'))).map(|letter| match letter { + 'h' => abi.endianness, + 'b' | 'n' => Endianness::Big, + 'l' => Endianness::Little, + _ => panic!("Unknown endianness"), + }) + }; + + move |input| { + alt(( + char('s').map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::Str))), + context( + "integer", + alt(( + alt(( + alt(( + tag("hhu").map(|_| Ok((VBinSpecifier::U16, PrintSpecifier::Dec))), + tag("hhx").map(|_| Ok((VBinSpecifier::U16, PrintSpecifier::Hex))), + tag("hhX").map(|_| Ok((VBinSpecifier::U16, PrintSpecifier::UpperHex))), + tag("hho").map(|_| Ok((VBinSpecifier::U16, PrintSpecifier::Oct))), + )), + alt(( + tag("hhd").map(|_| Ok((VBinSpecifier::I16, PrintSpecifier::Dec))), + tag("hhi").map(|_| Ok((VBinSpecifier::I16, PrintSpecifier::Dec))), + tag("hh").map(|_| Ok((VBinSpecifier::I16, PrintSpecifier::Dec))), + )), + )), + alt(( + alt(( + tag("hu").map(|_| Ok((VBinSpecifier::U8, PrintSpecifier::Dec))), + tag("hx").map(|_| Ok((VBinSpecifier::U8, PrintSpecifier::Hex))), + tag("hX").map(|_| Ok((VBinSpecifier::U8, PrintSpecifier::UpperHex))), + tag("ho").map(|_| Ok((VBinSpecifier::U8, PrintSpecifier::Oct))), + )), + alt(( + tag("hd").map(|_| Ok((VBinSpecifier::I8, PrintSpecifier::Dec))), + tag("hi").map(|_| Ok((VBinSpecifier::I8, PrintSpecifier::Dec))), + char('h').map(|_| Ok((VBinSpecifier::I8, PrintSpecifier::Dec))), + char('c').map(|_| Ok((vbin_char.clone(), PrintSpecifier::Str))), + )), + )), + alt(( + alt((char('d'), char('i'))) + .map(|_| Ok((VBinSpecifier::I32, PrintSpecifier::Dec))), + char('u').map(|_| Ok((VBinSpecifier::U32, PrintSpecifier::Dec))), + char('o').map(|_| Ok((VBinSpecifier::U32, PrintSpecifier::Oct))), + char('x').map(|_| Ok((VBinSpecifier::U32, PrintSpecifier::Hex))), + char('X').map(|_| Ok((VBinSpecifier::U32, PrintSpecifier::UpperHex))), + )), + alt(( + tag("ld"), + tag("li"), + tag("Ld"), + tag("Li"), + tag("z"), + tag("zd"), + )) + .map(|_| Ok((vbin_long.clone(), PrintSpecifier::Dec))), + alt(( + alt((tag("lu"), tag("Lu"), tag("zu"))) + .map(|_| Ok((vbin_ulong.clone(), PrintSpecifier::Dec))), + alt((tag("lx"), tag("Lx"), tag("zx"))) + .map(|_| Ok((vbin_ulong.clone(), PrintSpecifier::Hex))), + alt((tag("lX"), tag("LX"), tag("zX"))) + .map(|_| Ok((vbin_ulong.clone(), PrintSpecifier::UpperHex))), + alt((tag("lo"), tag("Lo"), tag("zo"))) + .map(|_| Ok((vbin_ulong.clone(), PrintSpecifier::Oct))), + )), + alt(( + alt((tag("lld"), tag("lli"))) + .map(|_| Ok((VBinSpecifier::I64, PrintSpecifier::Dec))), + tag("llu").map(|_| Ok((VBinSpecifier::U64, PrintSpecifier::Dec))), + tag("llx").map(|_| Ok((VBinSpecifier::U64, PrintSpecifier::Hex))), + tag("llX").map(|_| Ok((VBinSpecifier::U64, PrintSpecifier::UpperHex))), + tag("llo").map(|_| Ok((VBinSpecifier::U64, PrintSpecifier::Oct))), + )), + )), + ), + preceded( + char('p'), + cut(context( + "pointer", + alt(( + // Some pointers encoded as a pre-rendered string in the + // vbin_printf buffer, so they use the VBinSpecifier::Str . + // Others are regular pointers and can be looked up in + // kallsyms. + alt(( + alt(( + alt((char('f'), char('s'))) + .map(|_| Ok((vbin_ulong.clone(), PrintSpecifier::Symbol))), + alt((tag("F"), tag("SR"), tag("S"))).map(|_| { + Ok((vbin_ulong.clone(), PrintSpecifier::SymbolWithOffset)) + }), + tag("B").map(|_| { + Ok((VBinSpecifier::Str, PrintSpecifier::SymbolWithOffset)) + }), + )), + alt(( + alt((char('r'), char('R'))) + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::Resource))), + preceded( + char('a'), + alt(( + char('d').map(|_| { + Ok((VBinSpecifier::Str, PrintSpecifier::DmaAddr)) + }), + opt(char('p')).map(|_| { + Ok((VBinSpecifier::Str, PrintSpecifier::PhysAddr)) + }), + )), + ), + preceded( + char('E'), + many0(alt(( + char('a'), + char('c'), + char('n'), + char('o'), + char('p'), + char('s'), + ))), + ) + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::EscapedBuffer))), + )), + alt(( + preceded( + char('h'), + alt(( + char('C').map(|_| HexBufferSeparator::C), + char('D').map(|_| HexBufferSeparator::D), + char('N').map(|_| HexBufferSeparator::N), + )), + ) + .map(|sep| { + Ok((VBinSpecifier::Str, PrintSpecifier::HexBuffer(sep))) + }), + preceded(char('M'), opt(alt((char('R'), char('F'))))) + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::MacAddress))), + preceded(char('m'), opt(char('R'))) + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::MacAddress))), + )), + alt(( + separated_pair( + alt((char('i'), char('I'))), + char('4'), + opt(ip_endianness()), + ) + .map(|(_, endianness)| { + let endianness = endianness.unwrap_or(Endianness::Big); + Ok((VBinSpecifier::Str, PrintSpecifier::Ipv4(endianness))) + }), + alt((tag("I6"), tag("i6"), tag("I6c"))) + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::Ipv6))), + preceded( + alt((char('i'), char('I'))).map(|_| ()), + preceded( + char('S'), + many0(alt(( + alt((char('p'), char('f'), char('s'), char('c'))) + .map(|_| None), + ip_endianness().map(Some), + ))), + ), + ) + .map(|flags| { + let endianness = flags + .into_iter() + .flatten() + .last() + .unwrap_or(Endianness::Big); + Ok((VBinSpecifier::Str, PrintSpecifier::IpGeneric(endianness))) + }), + )), + preceded(char('U'), alt((char('b'), char('B'), char('l'), char('L')))) + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::Uuid))), + tuple(( + alt((char('d'), char('D'))), + opt(alt((char('2'), char('3'), char('4')))), + )) + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::Dentry))), + alt(( + char('g') + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::BlockDevice))), + char('V') + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::VaFormat))), + preceded( + tag("OF"), + opt(alt(( + char('f'), + char('n'), + char('p'), + char('P'), + char('c'), + char('C'), + char('F'), + ))), + ) + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::Kobject))), + )), + alt(( + preceded(char('C'), opt(alt((char('n'), char('r'))))) + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::Clock))), + preceded(char('b'), opt(char('l'))) + .map(|_| Ok((VBinSpecifier::Str, PrintSpecifier::Bitmap))), + preceded( + char('G'), + alt(( + char('p').map(|_| PrintSpecifier::PageFlags), + char('g').map(|_| PrintSpecifier::GfpFlags), + char('v').map(|_| PrintSpecifier::VmaFlags), + )), + ) + .map(|specifier| Ok((VBinSpecifier::Str, specifier))), + tag("NF").map(|_| { + Ok((VBinSpecifier::Str, PrintSpecifier::NetdevFeatures)) + }), + )), + )), + // Simple pointers encoded as integers in vbin_printf buffer + alt((char('x').map(|_| ()), char('K').map(|_| ()), success(()))) + .map(|_x| Ok((vbin_ulong.clone(), PrintSpecifier::Hex))), + )), + )), + ), + alt((char('n'), char('e'), char('f'), char('g'), char('a'))) + .map(|c| Err(PrintFmtError::IllegalSpecifier(String::from_iter([c])))), + )) + .parse(input) + } +} + +// Formats documented there: +// https://www.kernel.org/doc/Documentation/printk-formats.txt +// Plus some specifiers that are undocumented +fn print_fmt_parser<'a, 'abi, E>( + abi: &'abi Abi, +) -> impl nom::Parser<&'a [u8], PrintFmtStr, E> + 'abi +where + E: 'abi + + ParseError<&'a [u8]> + + ContextError<&'a [u8]> + + FromExternalError<&'a [u8], PrintFmtError> + + Debug, + 'a: 'abi, +{ + move |input| { + let (_vbin_long, _vbin_ulong) = match abi.long_size { + LongSize::Bits32 => (VBinSpecifier::I32, VBinSpecifier::U32), + LongSize::Bits64 => (VBinSpecifier::I64, VBinSpecifier::U64), + }; + + let flags = || { + context( + "flag", + alt(( + char('-').map(|_| PrintFlags::LeftJustify), + char('+').map(|_| PrintFlags::PositiveSign), + char(' ').map(|_| PrintFlags::SignPlaceholder), + char('#').map(|_| PrintFlags::BasePrefix), + char('0').map(|_| PrintFlags::ZeroPad), + )), + ) + }; + + let precision = || { + alt(( + preceded( + char('.'), + context( + "precision", + alt(( + map_res_cut(preceded(char('*'), specifier(abi)), |spec| { + let (vbin_spec, print_spec) = spec?; + Ok((vbin_spec, print_spec, PrintPrecision::Dynamic)) + }), + map_res_cut(tuple((opt(txt_u64), specifier(abi))), |(width, spec)| { + let (vbin_spec, print_spec) = spec?; + // No value after the dot is same as an explicit 0 + let width = width.unwrap_or(0); + let width = width.try_into().unwrap(); + Ok((vbin_spec, print_spec, PrintPrecision::Fixed(width))) + }), + )), + ), + ), + map_res_cut(specifier(abi), |spec| { + let (vbin_spec, print_spec) = spec?; + Ok((vbin_spec, print_spec, PrintPrecision::Unmodified)) + }), + )) + }; + + let width = || { + alt(( + context( + "width", + alt(( + map_res_cut( + preceded(char('*'), precision()), + |(vbin_spec, print_spec, precision)| { + Ok((vbin_spec, print_spec, precision, PrintWidth::Dynamic)) + }, + ), + map_res_cut( + tuple((txt_u64, precision())), + |(width, (vbin_spec, print_spec, precision))| { + Ok(( + vbin_spec, + print_spec, + precision, + PrintWidth::Fixed(width.try_into().unwrap()), + )) + }, + ), + )), + ), + map_res_cut(precision(), |(vbin_spec, print_spec, precision)| { + Ok((vbin_spec, print_spec, precision, PrintWidth::Unmodified)) + }), + )) + }; + + let mut parser = context( + "printk format string", + map_res_cut( + many0(alt(( + preceded( + char('%'), + context( + "specifier", + alt(( + char('%').map(|_| PrintAtom::Fixed("%".into())), + tuple((many1(flags()), width())).map( + |(flags, (vbin_spec, print_spec, precision, width))| { + let flags = PrintFlags::from_iter(flags); + PrintAtom::new_variable( + abi, vbin_spec, print_spec, flags, width, precision, + ) + }, + ), + width().map(|(vbin_spec, print_spec, precision, width)| { + PrintAtom::new_variable( + abi, + vbin_spec, + print_spec, + PrintFlags::empty(), + width, + precision, + ) + }), + )), + ), + ), + context( + "fixed", + map_res_cut(is_not("%"), |s: &[u8]| { + Ok(PrintAtom::Fixed(from_utf8(s)?.into())) + }), + ), + ))), + |atoms: Vec| -> Result { + // Merge consecutive PrintAtom::Fixed together + let mut merged = Vec::with_capacity(atoms.len()); + atoms + .iter() + .group_by(|x| matches!(x, PrintAtom::Fixed(_))) + .into_iter() + .map(|(key, group)| { + if key { + let merged_s = group + .map(|x| match x { + PrintAtom::Fixed(s) => s, + _ => panic!("Expected fixed atom"), + }) + .collect(); + merged.push(PrintAtom::Fixed(merged_s)) + } else { + merged.extend(group.cloned()) + } + }) + .for_each(drop); + + Ok(PrintFmtStr { + atoms: merged, + vbin_decoders: OnceCell::new(), + }) + }, + ), + ); + parser.parse(input) + } +} + +#[inline] +pub fn parse_print_fmt(header: &Header, fmt: &[u8]) -> Result { + print_fmt_parser::>>(header.kernel_abi()) + .parse_finish(fmt) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{header::Endianness, parser::tests::test_parser}; + + #[test] + fn print_fmt_test() { + let abi = Abi { + long_size: LongSize::Bits64, + endianness: Endianness::Little, + char_signedness: Signedness::Unsigned, + }; + + let test = |src: &[u8], expected: Vec| { + let expected = PrintFmtStr { + vbin_decoders: OnceCell::new(), + atoms: expected, + }; + test_parser(expected, src, print_fmt_parser(&abi)) + }; + + macro_rules! new_variable_atom { + ($($args:expr),* $(,)?) => { + PrintAtom::new_variable( + &abi, + $($args),* + ) + } + } + + test( + b"%u", + vec![new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Dec, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + )], + ); + + test( + b"%03d", + vec![new_variable_atom!( + VBinSpecifier::I32, + PrintSpecifier::Dec, + PrintFlags::ZeroPad, + PrintWidth::Fixed(3), + PrintPrecision::Unmodified, + )], + ); + + test( + b"%#016x", + vec![new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Hex, + PrintFlags::BasePrefix | PrintFlags::ZeroPad, + PrintWidth::Fixed(16), + PrintPrecision::Unmodified, + )], + ); + test( + b"%#016.42x", + vec![new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Hex, + PrintFlags::BasePrefix | PrintFlags::ZeroPad, + PrintWidth::Fixed(16), + PrintPrecision::Fixed(42), + )], + ); + test( + b"%#016.*x", + vec![new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Hex, + PrintFlags::BasePrefix | PrintFlags::ZeroPad, + PrintWidth::Fixed(16), + PrintPrecision::Dynamic, + )], + ); + + test( + b"%px", + vec![new_variable_atom!( + VBinSpecifier::U64, + PrintSpecifier::Hex, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + )], + ); + + test( + b"%p", + vec![new_variable_atom!( + VBinSpecifier::U64, + PrintSpecifier::Hex, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + )], + ); + + test( + b"%pap", + vec![new_variable_atom!( + VBinSpecifier::Str, + PrintSpecifier::PhysAddr, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + )], + ); + + test( + b"%pad", + vec![new_variable_atom!( + VBinSpecifier::Str, + PrintSpecifier::DmaAddr, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + )], + ); + + test( + b"foo %pa", + vec![ + PrintAtom::Fixed("foo ".into()), + new_variable_atom!( + VBinSpecifier::Str, + PrintSpecifier::PhysAddr, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + ), + ], + ); + + test( + b"foo %u bar %px baz %%%pS", + vec![ + PrintAtom::Fixed("foo ".into()), + new_variable_atom!( + VBinSpecifier::U32, + PrintSpecifier::Dec, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + ), + PrintAtom::Fixed(" bar ".into()), + new_variable_atom!( + VBinSpecifier::U64, + PrintSpecifier::Hex, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + ), + PrintAtom::Fixed(" baz %".into()), + new_variable_atom!( + VBinSpecifier::U64, + PrintSpecifier::SymbolWithOffset, + PrintFlags::empty(), + PrintWidth::Unmodified, + PrintPrecision::Unmodified, + ), + ], + ); + } +} diff --git a/tools/trace-parser/traceevent/src/scratch.rs b/tools/trace-parser/traceevent/src/scratch.rs new file mode 100644 index 0000000000..3d7dd40adf --- /dev/null +++ b/tools/trace-parser/traceevent/src/scratch.rs @@ -0,0 +1,574 @@ +use core::{ + fmt::{Debug, Formatter}, + iter::Extend, + marker::PhantomData, + mem::ManuallyDrop, + ops::{Deref, DerefMut, RangeBounds}, + ptr::NonNull, +}; +use std::{io, sync::Arc}; + +use bumpalo::{boxed::Box as BumpaloBox, collections::Vec as BumpaloVec, Bump}; +use thread_local::ThreadLocal; + +pub struct ScratchAlloc { + pub(crate) bump: ThreadLocal, +} + +impl ScratchAlloc { + #[inline] + pub fn new() -> Self { + ScratchAlloc { + bump: ThreadLocal::new(), + } + } + + #[inline] + fn bump(&self) -> &Bump { + self.bump.get_or(Bump::new) + } + + #[inline] + pub fn reset(&mut self) { + for bump in self.bump.iter_mut() { + bump.reset() + } + } + + #[inline] + pub fn move_inside(&self, x: T) -> &mut T { + OwnedScratchBox::new_in(x, self).leak() + } +} + +impl AsRef for ScratchAlloc { + #[inline] + fn as_ref(&self) -> &Self { + self + } +} + +impl Default for ScratchAlloc { + fn default() -> Self { + Self::new() + } +} + +#[derive(Clone)] +pub enum ScratchBox<'a, T: 'a + ?Sized, A = &'a ScratchAlloc> +where + A: 'a + AsRef, +{ + Owned(OwnedScratchBox<'a, T, A>), + Borrowed(&'a T), + Arc(Arc), +} + +impl<'a, T: Debug + ?Sized, A> Debug for ScratchBox<'a, T, A> +where + A: 'a + AsRef, +{ + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + self.as_ref().fmt(f) + } +} + +impl<'a, T, A> ScratchBox<'a, T, A> +where + A: 'a + AsRef, +{ + #[inline] + pub fn new_in(value: T, alloc: A) -> ScratchBox<'a, T, A> { + ScratchBox::Owned(OwnedScratchBox::new_in(value, alloc)) + } + + #[inline] + pub fn into_static(self) -> ScratchBox<'static, T, A> + where + T: Clone, + { + match self { + ScratchBox::Owned(owned) => ScratchBox::Arc(Arc::new(owned.into_inner())), + ScratchBox::Borrowed(x) => ScratchBox::Arc(Arc::new(x.clone())), + ScratchBox::Arc(rc) => ScratchBox::Arc(rc), + } + } + + #[inline] + pub fn into_inner(self) -> T + where + T: Clone, + { + match self { + ScratchBox::Owned(owned) => owned.into_inner(), + ScratchBox::Borrowed(x) => x.clone(), + ScratchBox::Arc(rc) => rc.deref().clone(), + } + } +} + +impl<'a, T: ?Sized, A> Deref for ScratchBox<'a, T, A> +where + A: 'a + AsRef, +{ + type Target = T; + + #[inline] + fn deref(&self) -> &Self::Target { + match self { + ScratchBox::Owned(owned) => owned, + ScratchBox::Borrowed(x) => x, + ScratchBox::Arc(rc) => rc, + } + } +} + +impl<'a, T: ?Sized, A> AsRef for ScratchBox<'a, T, A> +where + A: 'a + AsRef, +{ + #[inline] + fn as_ref(&self) -> &T { + self.deref() + } +} + +impl<'a, T: ?Sized, A> From> for ScratchBox<'a, T, A> +where + A: 'a + AsRef, +{ + #[inline] + fn from(x: OwnedScratchBox<'a, T, A>) -> Self { + ScratchBox::Owned(x) + } +} + +impl<'a, T: PartialEq + ?Sized, A> PartialEq for ScratchBox<'a, T, A> +where + A: 'a + AsRef, +{ + #[inline] + fn eq(&self, other: &Self) -> bool { + self.deref().eq(other.deref()) + } +} + +impl<'a, T: Eq + ?Sized, A> Eq for ScratchBox<'a, T, A> where A: 'a + AsRef {} + +// Box +pub struct OwnedScratchBox<'a, T: 'a + ?Sized, A = &'a ScratchAlloc> { + // BumpaloBox<'_, T> is unfortunately invariant in T even though it should be covariant, like std::boxed::Box: + // https://github.com/fitzgen/bumpalo/issues/170 + // To work around that, we store the NonNull<> pointer that is designed for + // exactly that use case, and we convert back to BumpaloBox when it's + // required. + pub(crate) inner: NonNull, + pub(crate) alloc: A, + pub(crate) __phantom: PhantomData<&'a ()>, +} + +impl<'a, T, A> OwnedScratchBox<'a, T, A> +where + A: 'a + AsRef, +{ + #[inline] + pub fn new_in(value: T, alloc: A) -> OwnedScratchBox<'a, T, A> { + OwnedScratchBox { + inner: Self::alloc_nonnull(value, alloc.as_ref()), + alloc, + __phantom: PhantomData, + } + } + + pub fn with_capacity_in(capacity: usize, alloc: A) -> OwnedScratchBox<'a, [T], A> + where + T: Default + Clone, + { + let mut vec = BumpaloVec::with_capacity_in(capacity, alloc.as_ref().bump()); + // Fill the vec with placeholder value + vec.resize(capacity, Default::default()); + + // Convert to a BumpaloBox first to ensure that casting back the NonNull + // to a BumpaloBox is not UB. + let bbox: BumpaloBox<[T]> = vec.into(); + + OwnedScratchBox { + inner: NonNull::from(BumpaloBox::leak(bbox)), + alloc, + __phantom: PhantomData, + } + } + + pub fn from_slice(data: &[T], alloc: A) -> OwnedScratchBox<'a, [T], A> + where + T: Default + Clone, + { + let mut new = Self::with_capacity_in(data.len(), alloc); + new.deref_mut().clone_from_slice(data); + new + } + + #[inline] + pub fn into_inner(self) -> T { + BumpaloBox::into_inner(self.into_bumpalobox()) + } + + #[inline] + fn alloc_nonnull(value: T, alloc: &ScratchAlloc) -> NonNull { + let bbox = BumpaloBox::new_in(value, alloc.bump()); + NonNull::from(BumpaloBox::leak(bbox)) + } +} + +macro_rules! OwnedScratchBox_as_dyn { + ($expr:expr, $trait:path) => {{ + fn make( + expr: $crate::scratch::OwnedScratchBox<'_, T, A>, + ) -> $crate::scratch::OwnedScratchBox<'_, dyn $trait, A> { + let new = $crate::scratch::OwnedScratchBox { + alloc: ::core::clone::Clone::clone(&expr.alloc), + inner: ::core::ptr::NonNull::new( + // Use leak() so that we consume the box without freeing the + // value. + expr.inner.as_ptr() as *mut dyn $trait, + ) + .unwrap(), + __phantom: ::core::marker::PhantomData, + }; + // Ensure the destructor of the underlying value is never called, + // otherwise that would lead to a double free when the new box is + // dropped. + ::core::mem::forget(expr); + new + } + make($expr) + }}; +} +pub(crate) use OwnedScratchBox_as_dyn; + +/// Marker trait for types that don't have any Drop implementation, so not using +/// core::mem::forget() on them will not lead to any nasty effect (memory leak, +/// locks unreleased etc.). +pub trait NoopDrop {} +macro_rules! nodrop_impl { + ($($typ:ty),*) => { + $( + impl NoopDrop for $typ {} + )* + }; +} +nodrop_impl!(u8, i8, u16, i16, u32, i32, u64, i64, u128, i128, f32, f64, bool, str); +impl NoopDrop for [T] {} +impl NoopDrop for [T; N] {} +impl NoopDrop for &T {} +impl NoopDrop for &mut T {} + +impl<'a, T: ?Sized, A> OwnedScratchBox<'a, T, A> { + #[inline] + fn into_bumpalobox(self) -> BumpaloBox<'a, T> { + let this = ManuallyDrop::new(self); + // SAFETY: We own the pointer, it is not aliased anywhere. Also, the + // destructor will not run since we used ManuallyDrop + unsafe { BumpaloBox::from_raw(this.inner.as_ptr()) } + } + + #[inline] + pub fn leak(self) -> &'a mut T + where + T: NoopDrop, + { + BumpaloBox::leak(self.into_bumpalobox()) + } +} + +// SAFETY: If the allocator and the value are Send, then the boxed value is also +// Send. +unsafe impl<'a, T: Send + ?Sized, A> Send for OwnedScratchBox<'a, T, A> +where + T: Send, + A: Send, +{ +} + +// SAFETY: If the allocator and the value are Sync, then the boxed value is also +// Sync. +unsafe impl<'a, T, A> Sync for OwnedScratchBox<'a, T, A> +where + T: Sync + ?Sized, + A: Sync, +{ +} + +impl<'a, T: ?Sized, A> Drop for OwnedScratchBox<'a, T, A> { + #[inline] + fn drop(&mut self) { + // SAFETY: We own the pointer, it is not aliased anywhere. Also, it was + // created by leaking a BumpaloBox in the first place. + let ptr = self.inner.as_ptr(); + unsafe { + BumpaloBox::from_raw(ptr); + } + } +} + +impl<'a, T: ?Sized, A> Deref for OwnedScratchBox<'a, T, A> { + type Target = T; + + #[inline] + fn deref(&self) -> &Self::Target { + // SAFETY: the pointer is not aliased anywhere + unsafe { self.inner.as_ref() } + } +} + +impl<'a, T: ?Sized, A> DerefMut for OwnedScratchBox<'a, T, A> { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: the pointer is not aliased anywhere + unsafe { self.inner.as_mut() } + } +} + +impl<'a, T: ?Sized, A> AsRef for OwnedScratchBox<'a, T, A> { + #[inline] + fn as_ref(&self) -> &T { + self.deref() + } +} + +impl<'a, T: ?Sized, A> AsMut for OwnedScratchBox<'a, T, A> { + #[inline] + fn as_mut(&mut self) -> &mut T { + self.deref_mut() + } +} + +impl<'a, T: Clone, A: Clone> Clone for OwnedScratchBox<'a, T, A> +where + A: 'a + AsRef, +{ + #[inline] + fn clone(&self) -> Self { + let value = Clone::clone(self.deref()); + OwnedScratchBox { + inner: Self::alloc_nonnull(value, self.alloc.as_ref()), + alloc: Clone::clone(&self.alloc), + __phantom: PhantomData, + } + } +} + +impl<'a, T: Debug + ?Sized, A> Debug for OwnedScratchBox<'a, T, A> { + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + self.deref().fmt(f) + } +} +impl<'a, T: PartialEq + ?Sized, A> PartialEq for OwnedScratchBox<'a, T, A> { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.deref().eq(other.deref()) + } +} + +impl<'a, T: Eq + ?Sized, A> Eq for OwnedScratchBox<'a, T, A> {} + +// Vec +pub struct ScratchVec<'a, T: 'a>(BumpaloVec<'a, T>); + +impl<'a, T> ScratchVec<'a, T> { + #[inline] + pub fn new_in(alloc: &'a ScratchAlloc) -> Self { + ScratchVec(BumpaloVec::new_in(alloc.bump())) + } + + #[inline] + pub fn with_capacity_in(capacity: usize, alloc: &'a ScratchAlloc) -> Self { + ScratchVec(BumpaloVec::with_capacity_in(capacity, alloc.bump())) + } + + #[inline] + pub fn push(&mut self, value: T) { + self.0.push(value) + } + + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.0.reserve(additional) + } + + #[inline] + pub fn extend(&mut self, iter: I) + where + I: IntoIterator, + { + self.0.extend(iter) + } + + #[inline] + pub fn truncate(&mut self, len: usize) { + self.0.truncate(len) + } + + #[inline] + pub fn resize(&mut self, new_len: usize, value: T) + where + T: Clone, + { + self.0.resize(new_len, value) + } + #[inline] + pub fn leak(self) -> &'a mut [T] + where + T: NoopDrop, + { + self.0.into_bump_slice_mut() + } +} + +impl<'a, T> ScratchVec<'a, T> +where + T: Clone, +{ + #[inline] + pub fn extend_from_slice(&mut self, other: &[T]) { + self.0.extend_from_slice(other) + } +} + +impl<'a, T> ScratchVec<'a, T> +where + T: Copy, +{ + #[inline] + pub fn copy_from_slice(&mut self, other: &[T]) { + self.0.copy_from_slice(other) + } + + #[inline] + pub fn copy_within(&mut self, src: R, dst: usize) + where + R: RangeBounds, + T: Copy, + { + self.0.copy_within(src, dst) + } +} + +impl<'a, T> Deref for ScratchVec<'a, T> { + type Target = [T]; + + #[inline] + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +impl<'a, T> DerefMut for ScratchVec<'a, T> { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + self.0.deref_mut() + } +} + +impl<'a, T> AsRef<[T]> for ScratchVec<'a, T> { + #[inline] + fn as_ref(&self) -> &[T] { + self.deref() + } +} + +impl<'a, T> AsMut<[T]> for ScratchVec<'a, T> { + #[inline] + fn as_mut(&mut self) -> &mut [T] { + self.deref_mut() + } +} + +impl<'a, T: Clone> Clone for ScratchVec<'a, T> { + #[inline] + fn clone(&self) -> Self { + ScratchVec(self.0.clone()) + } +} + +impl<'a, T: Debug> Debug for ScratchVec<'a, T> { + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> { + self.0.fmt(f) + } +} + +impl<'a> io::Write for ScratchVec<'a, u8> { + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result { + self.0.extend_from_slice(buf); + Ok(buf.len()) + } + + #[inline] + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +impl<'a, T> IntoIterator for ScratchVec<'a, T> { + type Item = T; + type IntoIter = IntoIterVec<'a, T>; + + // Required method + fn into_iter(self) -> Self::IntoIter { + IntoIterVec(self.0.into_iter()) + } +} + +pub struct IntoIterVec<'a, T>( as IntoIterator>::IntoIter); + +impl<'a, T> Iterator for IntoIterVec<'a, T> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.0.count() + } +} + +impl<'a, A> Extend for ScratchVec<'a, A> { + #[inline] + fn extend(&mut self, iter: T) + where + T: IntoIterator, + { + for x in iter { + self.push(x) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn variance_test() { + // As long as this compiles, we know that OwnedScratchBox<'a, T> will be + // covariant in 'a + #[allow(dead_code)] + fn subtype<'alloc_child, 'alloc_parent: 'alloc_child, 'child, 'parent: 'child>( + x: OwnedScratchBox<'alloc_parent, &'parent i32>, + ) -> OwnedScratchBox<'alloc_child, &'child i32> { + x + } + } +} diff --git a/tools/trace-parser/traceevent/src/str.rs b/tools/trace-parser/traceevent/src/str.rs new file mode 100644 index 0000000000..7fffc1b505 --- /dev/null +++ b/tools/trace-parser/traceevent/src/str.rs @@ -0,0 +1,265 @@ +use core::{ + borrow::Borrow, + cmp::Ordering, + convert::{AsMut, AsRef}, + fmt, + hash::{Hash, Hasher}, + ops::{Deref, DerefMut}, +}; +use std::sync::Arc; + +use smartstring::alias::String; + +use crate::{ + memo::Memo, + scratch::{OwnedScratchBox, OwnedScratchBox_as_dyn, ScratchAlloc}, +}; + +#[derive(Debug, Clone)] +pub struct Str<'a> { + pub(crate) inner: InnerStr<'a>, +} + +type StrProcedure<'a> = Memo< + String, + OwnedScratchBox<'a, dyn StringProducer>, + fn(&OwnedScratchBox) -> String, +>; + +#[derive(Debug, Clone)] +pub(crate) enum InnerStr<'a> { + Borrowed(&'a str), + Owned(String), + Arc(Arc), + Procedural(StrProcedure<'a>), +} + +impl<'a> Clone for OwnedScratchBox<'a, dyn StringProducer> { + #[inline] + fn clone(&self) -> Self { + self.clone_box(self.alloc) + } +} + +pub trait StringProducer: Send + Sync { + fn write(&self, out: &mut dyn fmt::Write); + fn clone_box<'a>(&self, alloc: &'a ScratchAlloc) -> OwnedScratchBox<'a, dyn StringProducer> + where + Self: 'a; +} + +// impl StringProducer for &dyn StringProducer { +// #[inline] +// fn write(&self, out: &mut dyn fmt::Write) { +// (*self).write(out) +// } + +// #[inline] +// fn clone_box<'a>(&self, alloc: &'a ScratchAlloc) -> OwnedScratchBox<'a, dyn StringProducer> +// where +// Self: 'a, +// { +// (*self).clone_box(alloc) +// } +// } + +impl StringProducer for F +where + F: Fn(&mut dyn fmt::Write) + Send + Sync + Clone, +{ + #[inline] + fn write(&self, out: &mut dyn fmt::Write) { + self(out) + } + + fn clone_box<'a>(&self, alloc: &'a ScratchAlloc) -> OwnedScratchBox<'a, dyn StringProducer> + where + Self: 'a, + { + let sbox = OwnedScratchBox::new_in(self.clone(), alloc); + OwnedScratchBox_as_dyn!(sbox, StringProducer) + } +} + +fn write_to_string(f: &OwnedScratchBox) -> String { + let mut new = String::new(); + f.write(&mut new); + new +} + +impl<'a> Str<'a> { + #[inline] + pub fn new_procedural(f: OwnedScratchBox<'a, T>) -> Self { + let f = OwnedScratchBox_as_dyn!(f, StringProducer); + Str { + inner: InnerStr::Procedural(Memo::new(f, write_to_string)), + } + } + #[inline] + pub fn new_borrowed(s: &'a str) -> Self { + Str { + inner: InnerStr::Borrowed(s), + } + } + + #[inline] + pub fn new_owned(s: String) -> Self { + Str { + inner: InnerStr::Owned(s), + } + } + + #[inline] + pub fn new_arc(s: Arc) -> Self { + Str { + inner: InnerStr::Arc(s), + } + } + + #[inline] + pub fn into_static(self) -> Str<'static> { + Str { + inner: InnerStr::Owned(match self.inner { + InnerStr::Owned(s) => s, + InnerStr::Borrowed(s) => (*s).into(), + InnerStr::Arc(s) => (&*s).into(), + InnerStr::Procedural(p) => p.into_owned(), + }), + } + } +} + +impl<'a> Deref for Str<'a> { + type Target = str; + + #[inline] + fn deref(&self) -> &Self::Target { + match &self.inner { + InnerStr::Borrowed(s) => s, + InnerStr::Owned(s) => s, + InnerStr::Arc(s) => s, + InnerStr::Procedural(memo) => memo.deref(), + } + } +} + +impl<'a> DerefMut for Str<'a> { + #[inline] + fn deref_mut<'b>(&'b mut self) -> &'b mut Self::Target { + macro_rules! own { + ($s:expr) => {{ + let s: &str = $s.deref(); + *self = Str::new_owned(s.into()); + match self.inner { + InnerStr::Owned(ref mut s) => s, + _ => unreachable!(), + } + }}; + } + + macro_rules! cast_lifetime { + ($s:expr) => {{ + // SAFETY: This works around a borrow checker limitation, where + // simply returning s.deref_mut() would borrow "self" for the + // lifetime of the scope, preventing from modifying "self" in + // the other unrelated match arms. + unsafe { core::mem::transmute::<&mut str, &'b mut str>($s) } + }}; + } + + match &mut self.inner { + InnerStr::Owned(s) => { + cast_lifetime!(s.deref_mut()) + } + InnerStr::Borrowed(s) => own!(s), + InnerStr::Arc(s) => own!(s), + InnerStr::Procedural(memo) => cast_lifetime!(memo.deref_mut()), + } + } +} + +impl<'a> AsRef for Str<'a> { + #[inline] + fn as_ref(&self) -> &str { + self.deref() + } +} + +impl<'a> AsMut for Str<'a> { + #[inline] + fn as_mut(&mut self) -> &mut str { + self.deref_mut() + } +} + +impl<'a> From<&'a str> for Str<'a> { + #[inline] + fn from(s: &'a str) -> Str<'a> { + Str { + inner: InnerStr::Borrowed(s), + } + } +} + +impl<'r, 'a: 'r> From<&'r Str<'a>> for &'r str { + #[inline] + fn from(s: &'r Str<'a>) -> &'r str { + s + } +} + +impl<'a> From<&Str<'a>> for String { + #[inline] + fn from(s: &Str<'a>) -> String { + match &s.inner { + InnerStr::Owned(s) => s.clone(), + InnerStr::Borrowed(s) => (*s).into(), + InnerStr::Arc(s) => s.deref().into(), + InnerStr::Procedural(memo) => memo.deref().clone(), + } + } +} + +impl<'a> PartialEq for Str<'a> { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} + +impl<'a> Eq for Str<'a> {} + +impl<'a> PartialOrd for Str<'a> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl<'a> Ord for Str<'a> { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.deref().cmp(other.deref()) + } +} + +impl<'a> Hash for Str<'a> { + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash(self.deref(), state) + } +} + +impl<'a> Borrow for Str<'a> { + #[inline] + fn borrow(&self) -> &str { + self + } +} + +impl<'a> fmt::Display for Str<'a> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + fmt::Display::fmt(self.deref(), f) + } +}