From 0ba355221e85491529ea116a847bf41a2a45896e Mon Sep 17 00:00:00 2001 From: Joshua Liebow-Feeser Date: Wed, 6 Sep 2023 18:57:18 +0000 Subject: [PATCH] Add KnownLayout::validate_cast TODO: - Tests - Do we need to think about `isize` overflow in order to prevent user code from being unsound (ie, generating invalid calls to `<*const _>::add`)? My current thinking is that we don't - it's already invalid to construct a memory region that overflows `isize`, and so it'd never be valid to call `validate_cast` in that case. Co-authored-by: Jack Wrenn --- Cargo.toml | 2 + src/lib.rs | 337 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 339 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 117ebed0d7..cfc32c85d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,8 @@ optional = true zerocopy-derive = { version = "=0.7.3", path = "zerocopy-derive" } [dev-dependencies] +assert_matches = "1.5" +itertools = "0.11" rand = { version = "0.8.5", features = ["small_rng"] } rustversion = "1.0" static_assertions = "1.1" diff --git a/src/lib.rs b/src/lib.rs index 4da4e3db76..558e8d931e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -219,12 +219,21 @@ pub struct DstLayout { /// `size_of::()`. For DSTs, the size represents the size of the type /// when the trailing slice field contains 0 elements. /// - For all types, the alignment represents the alignment of the type. + // TODO: If we end up replacing this with separate size and alignment to + // make Kani happy, file an issue to eventually adopt the stdlib's + // `Alignment` type trick. _base_layout: Layout, /// For sized types, `None`. For DSTs, the size of the element type of the /// trailing slice. _trailing_slice_elem_size: Option, } +#[cfg_attr(test, derive(Copy, Clone, Debug))] +enum _CastType { + _Prefix, + _Suffix, +} + impl DstLayout { /// Constructs a `DstLayout` which describes `T`. /// @@ -251,6 +260,202 @@ impl DstLayout { _trailing_slice_elem_size: Some(mem::size_of::()), } } + + /// Validates that a cast is sound from a layout perspective. + /// + /// Validates that the size and alignment requirements of a type with the + /// layout described in `self` would not be violated by performing a + /// `cast_type` cast from a pointer with address `addr` which refers to a + /// memory region of size `bytes_len`. + /// + /// If the cast is valid, `validate_cast` returns `(elems, split_at)`. If + /// `self` describes a dynamically-sized type, then `elems` is the maximum + /// number of trailing slice elements for which a cast would be valid (for + /// sized types, `elem` is meaningless and should be ignored). `split_at` is + /// the index at which to split the memory region in order for the prefix + /// (suffix) to contain the result of the cast, and in order for the + /// remaining suffix (prefix) to contain the leftover bytes. + /// + /// There are three conditions under which a cast can fail: + /// - The smallest possible value for the type is larger than the provided + /// memory region + /// - A prefix cast is requested, and `addr` does not satisfy `self`'s + /// alignment requirement + /// - A suffix cast is requested, and `addr + bytes_len` does not satisfy + /// `self`'s alignment requirement (as a consequence, since the size of + /// the trailing slice element is a multiple of the alignment, no length + /// for the trailing slice will result in a starting address which is + /// properly aligned) + /// + /// # Safety + /// + /// The caller may assume that this implementation is correct, and may rely + /// on that assumption for the soundness of their code. In particular, the + /// caller may assume that: + /// - A pointer to the type (for dynamically sized types, this includes + /// `elems` as its pointer metadata) describes an object of size `size <= + /// bytes_len` + /// - If this is a prefix cast, `addr` satisfies `self`'s alignment + /// - If this is a suffix cast, `addr + bytes_len - size` satisfies `self`'s + /// alignment + /// + /// # Panics + /// + /// If `addr + bytes_len` overflows `usize`, `validate_cast` may panic, or + /// it may return incorrect results. No guarantees are made about when + /// `validate_cast` will panic. The caller should not rely on + /// `validate_cast` panicking in any particular condition, even if + /// `debug_assertions` are enabled. + const fn _validate_cast( + &self, + addr: usize, + bytes_len: usize, + cast_type: _CastType, + ) -> Option<(usize, usize)> { + // `debug_assert!`, but with `#[allow(clippy::arithmetic_side_effects)]`. + macro_rules! __debug_assert { + ($e:expr $(, $msg:expr)?) => { + debug_assert!({ + #[allow(clippy::arithmetic_side_effects)] + let e = $e; + e + } $(, $msg)?); + }; + } + + // Note that, in practice, `elem_size` is always a compile-time + // constant. We do this check earlier than needed to ensure that we + // always panic as a result of bugs in the program (such as calling this + // function on an invalid type) instead of allowing this panic to be + // hidden if the cast would have failed anyway for runtime reasons (such + // as a too-small memory region). + // + // TODO(#67): Once our MSRV is 1.65, use let-else: + // https://blog.rust-lang.org/2022/11/03/Rust-1.65.0.html#let-else-statements + let elem_size = match self._trailing_slice_elem_size { + Some(elem_size) => match NonZeroUsize::new(elem_size) { + Some(elem_size) => Some(elem_size), + None => panic!("attempted to cast to slice type with zero-sized element"), + }, + None => None, + }; + + // Precondition + __debug_assert!(addr.checked_add(bytes_len).is_some(), "`addr` + `bytes_len` > usize::MAX"); + + // We check alignment for `addr` (for prefix casts) or `addr + + // bytes_len` (for suffix casts). For a prefix cast, the correctness of + // this check is trivial - `addr` is the address the object will live + // at. + // + // For a suffix cast, we know that all valid sizes for the type are a + // multiple of the alignment. Thus, a validly-sized instance which lives + // at a validly-aligned address must also end at a validly-aligned + // address. Thus, if the end address for a suffix cast (`addr + + // bytes_len`) is not aligned, then no valid start address will be + // aligned either. + let offset = match cast_type { + _CastType::_Prefix => 0, + _CastType::_Suffix => bytes_len, + }; + + // Addition is guaranteed not to overflow because `offset <= bytes_len`, + // and `addr + bytes_len <= usize::MAX` is a precondition of this + // method. Modulus is guaranteed not to divide by 0 because `.align()` + // guarantees that its return value is non-zero. + #[allow(clippy::arithmetic_side_effects)] + if (addr + offset) % self._base_layout.align() != 0 { + return None; + } + + let base_size = self._base_layout.size(); + + // LEMMA 0: max_slice_bytes + base_size == bytes_len + // + // LEMMA 1: base_size <= bytes_len: + // - If `base_size > bytes_len`, `bytes_len.checked_sub(base_size)` + // returns `None`, and we return. + // + // TODO(#67): Once our MSRV is 1.65, use let-else: + // https://blog.rust-lang.org/2022/11/03/Rust-1.65.0.html#let-else-statements + let max_slice_bytes = if let Some(max_byte_slice) = bytes_len.checked_sub(base_size) { + max_byte_slice + } else { + return None; + }; + + // Lemma 0 + __debug_assert!(max_slice_bytes + base_size == bytes_len); + + // Lemma 1 + __debug_assert!(base_size <= bytes_len); + + let (elems, self_bytes) = if let Some(elem_size) = elem_size { + // Guaranteed not to divide by 0 because `elem_size` is a + // `NonZeroUsize`. + #[allow(clippy::arithmetic_side_effects)] + let elems = max_slice_bytes / elem_size.get(); + + // NOTE: Another option for this step in the algorithm is to set + // `slice_bytes = elems * elem_size`. However, using multiplication + // causes Kani to choke. In practice, the compiler is likely to + // generate identical machine code in both cases. Note that this + // divide-then-mod approach is trivially optimizable into a single + // operation that computes both the quotient and the remainder. + + // First line is guaranteed not to mod by 0 because `elem_size` is a + // `NonZeroUsize`. Second line is guaranteed not to underflow + // because `rem <= max_slice_bytes` thanks to the mod operation. + // + // LEMMA 2: slice_bytes <= max_slice_bytes + #[allow(clippy::arithmetic_side_effects)] + let rem = max_slice_bytes % elem_size.get(); + #[allow(clippy::arithmetic_side_effects)] + let slice_bytes = max_slice_bytes - rem; + + // Lemma 2 + __debug_assert!(slice_bytes <= max_slice_bytes); + + // Guaranteed not to overflow: + // - max_slice_bytes + base_size == bytes_len (lemma 0) + // - slice_bytes <= max_slice_bytes (lemma 2) + // - slice_bytes + base_size <= bytes_len (substitution) ------+ + // - bytes_len <= usize::MAX (bytes_len: usize) | + // - slice_bytes + base_size <= usize::MAX (substitution) | + // | + // LEMMA 3: self_bytes <= bytes_len: | + // - slice_bytes + base_size <= bytes_len <--------------------------+ (reused for lemma) + // - slice_bytes <= bytes_len + #[allow(clippy::arithmetic_side_effects)] + let self_bytes = base_size + slice_bytes; + + // Lemma 3 + __debug_assert!(self_bytes <= bytes_len); + + (elems, self_bytes) + } else { + (0, base_size) + }; + + // LEMMA 4: self_bytes <= bytes_len: + // - `if` branch returns `self_bytes`; lemma 3 guarantees `self_bytes <= + // bytes_len` + // - `else` branch returns `base_size`; lemma 1 guarantees `base_size <= + // bytes_len` + + // Lemma 4 + __debug_assert!(self_bytes <= bytes_len); + + let split_at = match cast_type { + _CastType::_Prefix => self_bytes, + // Guaranteed not to underflow because `self_bytes <= bytes_len` + // (lemma 4). + #[allow(clippy::arithmetic_side_effects)] + _CastType::_Suffix => bytes_len - self_bytes, + }; + + Some((elems, split_at)) + } } /// A trait which carries information about a type's layout that is used by the @@ -2738,6 +2943,138 @@ mod tests { } } + // This test takes a long time when running under Miri, so we skip it in + // that case. This is acceptable because this is a logic test that doesn't + // attempt to expose UB. + #[test] + #[cfg_attr(miri, ignore)] + fn test_validate_cast() { + fn layout( + base_size: usize, + align: usize, + _trailing_slice_elem_size: Option, + ) -> DstLayout { + DstLayout { + _base_layout: Layout::from_size_align(base_size, align).unwrap(), + _trailing_slice_elem_size, + } + } + + /// This macro accepts arguments in the form of: + /// + /// layout(_, _, _).validate_cast(_, _, _), Ok(Some((_, _))) + /// | | | | | | | | + /// base_size ----+ | | | | | | | + /// align -----------+ | | | | | | + /// trailing_size ------+ | | | | | + /// addr --------------------------------+ | | | | + /// bytes_len ------------------------------+ | | | + /// cast_type ---------------------------------+ | | + /// elems --------------------------------------------------+ | + /// split_at --------------------------------------------------+ + /// + /// Each argument can either be an iterator or a wildcard. Each + /// wildcarded variable is implicitly replaced by an iterator over a + /// representative sample of values for that variable. Each `test!` + /// invocation iterates over every combination of values provided by + /// each variable's iterator (ie, the cartesian product) and validates + /// that the results are expected. + /// + /// The final argument uses the same syntax, but it has a different + /// meaning: + /// - If it is `Ok(pat)`, then the pattern `pat` is supplied to + /// `assert_matches!` to validate the computed result for each + /// combination of input values. + /// - If it is `Err(msg)`, then `test!` validates that the call to + /// `validate_cast` panics with the given panic message. + /// + /// Note that the meta-variables that match these variables have the + /// `tt` type, and some valid expressions are not valid `tt`s (such as + /// `a..b`). In this case, wrap the expression in parentheses, and it + /// will become valid `tt`. + macro_rules! test { + ( + layout($base_size:tt, $align:tt, $trailing_size:tt) + .validate_cast($addr:tt, $bytes_len:tt, $cast_type:tt), $expect:pat $(,)? + ) => { + itertools::iproduct!( + test!(@generate_usize $base_size), + test!(@generate_align $align), + test!(@generate_opt_usize $trailing_size), + test!(@generate_usize $addr), + test!(@generate_usize $bytes_len), + test!(@generate_cast_type $cast_type) + ).for_each(|(base_size, align, trailing_size, addr, bytes_len, cast_type)| { + let actual = std::panic::catch_unwind(|| { + layout(base_size, align, trailing_size)._validate_cast(addr, bytes_len, cast_type) + }).map_err(|d| { + *d.downcast::<&'static str>().expect("expected string panic message").as_ref() + }); + assert_matches::assert_matches!( + actual, $expect, + "layout({base_size}, {align}, {trailing_size:?}).validate_cast({addr}, {bytes_len}, {cast_type:?})", + ); + }); + }; + (@generate_usize _) => { 0..8 }; + (@generate_align _) => { [1, 2, 4, 8, 16] }; + (@generate_opt_usize _) => { [None].into_iter().chain((0..8).map(Some).into_iter()) }; + (@generate_cast_type _) => { [_CastType::_Prefix, _CastType::_Suffix] }; + (@generate_cast_type $variant:ident) => { [_CastType::$variant] }; + // Some expressions need to be wrapped in parentheses in order to be + // valid `tt`s (required by the top match pattern). See the comment + // below for more details. This arm removes these parentheses to + // avoid generating an `unused_parens` warning. + (@$_:ident ($vals:expr)) => { $vals }; + (@$_:ident $vals:expr) => { $vals }; + } + + const EVENS: [usize; 5] = [0, 2, 4, 6, 8]; + const NZ_EVENS: [usize; 5] = [2, 4, 6, 8, 10]; + const ODDS: [usize; 5] = [1, 3, 5, 7, 9]; + + // base_size is too big for the memory region. + test!(layout((1..8), _, ((1..8).map(Some))).validate_cast(_, [0], _), Ok(None)); + test!(layout((2..8), _, ((1..8).map(Some))).validate_cast(_, [1], _), Ok(None)); + + // addr is unaligned for prefix cast + test!(layout(_, [2], [None]).validate_cast(ODDS, _, _Prefix), Ok(None)); + test!(layout(_, [2], (NZ_EVENS.map(Some))).validate_cast(ODDS, _, _Prefix), Ok(None)); + + // addr is aligned, but end of buffer is unaligned for suffix cast + test!(layout(_, [2], [None]).validate_cast(EVENS, ODDS, _Suffix), Ok(None)); + test!(layout(_, [2], (NZ_EVENS.map(Some))).validate_cast(EVENS, ODDS, _Suffix), Ok(None)); + + // TDOO: Success cases + + // Unfortunately, these constants cannot easily be used in the + // implementation of `validate_cast`, since `panic!` consumes a string + // literal, not an expression. + mod msgs { + pub(super) const TRAILING: &str = + "attempted to cast to slice type with zero-sized element"; + pub(super) const OVERFLOW: &str = "`addr` + `bytes_len` > usize::MAX"; + } + + // casts with ZST trailing element types are unsupported + test!(layout(_, _, [Some(0)]).validate_cast(_, _, _), Err(msgs::TRAILING),); + + // addr + bytes_len must not overflow usize + test!( + layout(_, [1], (NZ_EVENS.map(Some))).validate_cast([usize::MAX], (1..100), _), + Err(msgs::OVERFLOW) + ); + test!(layout(_, [1], [None]).validate_cast((1..100), [usize::MAX], _), Err(msgs::OVERFLOW)); + test!( + layout([1], [1], [None]).validate_cast( + [usize::MAX / 2 + 1, usize::MAX], + [usize::MAX / 2 + 1, usize::MAX], + _ + ), + Err(msgs::OVERFLOW) + ); + } + #[test] fn test_known_layout() { // Test that `$ty` and `ManuallyDrop<$ty>` have the expected layout.