From 866ecbcf849bae992d083129b45a75ef44860ee2 Mon Sep 17 00:00:00 2001 From: Patrick Mooney Date: Wed, 12 Jul 2023 10:49:36 -0500 Subject: [PATCH] Clean up NVMe PRP parsing and add tests --- lib/propolis/src/common.rs | 4 +- lib/propolis/src/hw/nvme/cmds.rs | 247 +++++++++++++++++++++++++++---- lib/propolis/src/vmm/mem.rs | 188 ++++++++++++++--------- 3 files changed, 343 insertions(+), 96 deletions(-) diff --git a/lib/propolis/src/common.rs b/lib/propolis/src/common.rs index aee4e9c2a..95fe3b3f2 100644 --- a/lib/propolis/src/common.rs +++ b/lib/propolis/src/common.rs @@ -336,11 +336,11 @@ impl RWOp<'_, '_> { } /// An address within a guest VM. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct GuestAddr(pub u64); /// A region of memory within a guest VM. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct GuestRegion(pub GuestAddr, pub usize); impl Add for GuestAddr { diff --git a/lib/propolis/src/hw/nvme/cmds.rs b/lib/propolis/src/hw/nvme/cmds.rs index 961dea0c3..aca561f66 100644 --- a/lib/propolis/src/hw/nvme/cmds.rs +++ b/lib/propolis/src/hw/nvme/cmds.rs @@ -602,12 +602,12 @@ impl PrpIter<'_> { assert!(self.error.is_none()); // PRP Entry Layout - // | 63 . . . . . . . . . . . . . . . . . . . n + 1 | n . . . . . . 2 | 1 0 | - // | page base address | offset | 0 0 | + // | 63 . . . . . . . . . . . . . . . n + 1 | n . . . . . . 2 | 1 0 | + // | page base address | offset | 0 0 | let (addr, size, next) = match self.next { PrpNext::Prp1 => { - // The first PRP entry contained within the command may have a non-zero offset - // within the memory page. + // The first PRP entry contained within the command may have a + // non-zero offset within the memory page. let offset = self.prp1 & PAGE_OFFSET as u64; let size = u64::min(PAGE_SIZE as u64 - offset, self.remain); let after = self.remain - size; @@ -618,7 +618,8 @@ impl PrpIter<'_> { // entry which should be present in PRP2 PrpNext::Prp2 } else { - // If the remaining length is larger than the page size, PRP2 points to a list. + // If the remaining length is larger than the page size, + // PRP2 points to a list. // // The first PRP List entry: // - shall be Qword aligned, and @@ -627,9 +628,10 @@ impl PrpIter<'_> { return Err("PRP2 not Qword aligned!"); } - // PRP2 is allowed a non-zero offset into the page, meaning this operation's - // PRP list could start in the middle of the page. For example, idx below could - // be anywhere from 0 to PRP_LIST_MAX: + // PRP2 is allowed a non-zero offset into the page, meaning + // this operation's PRP list could start in the middle of + // the page. For example, idx below could be anywhere from 0 + // to PRP_LIST_MAX: // // | idx // --------------| --- @@ -640,9 +642,10 @@ impl PrpIter<'_> { // PRP List base | PRP_LIST_MAX - 1 // PRP List base | PRP_LIST_MAX // - // Note that lists cannot cross page boundaries. If idx = PRP_LIST_MAX is - // reached, the last entry will point to another list (unless the remaining - // size is satisfied by the end of the list). + // Note that lists cannot cross page boundaries. If idx = + // PRP_LIST_MAX is reached, the last entry will point to + // another list (unless the remaining size is satisfied by + // the end of the list). let base = self.prp2 & (PAGE_MASK as u64); let idx = (self.prp2 & PAGE_OFFSET as u64) / 8; PrpNext::List(base, idx as u16) @@ -673,25 +676,18 @@ impl PrpIter<'_> { if self.remain <= PAGE_SIZE as u64 { (entry, self.remain, PrpNext::Done) + } else if idx != PRP_LIST_MAX { + (entry, PAGE_SIZE as u64, PrpNext::List(base, idx + 1)) } else { - if idx != PRP_LIST_MAX { - (entry, PAGE_SIZE as u64, PrpNext::List(base, idx + 1)) - } else { - // Chase the PRP to the next PRP list and read the first entry from it to - // use as the next result. - let next_entry: u64 = - self.mem.read(GuestAddr(entry)).ok_or_else( - || "Unable to read PRP list entry", - )?; - if next_entry & PAGE_OFFSET as u64 != 0 { - return Err("Inappropriate PRP list entry offset"); - } - (next_entry, PAGE_SIZE as u64, PrpNext::List(entry, 1)) - } + // The last PRP in this list chains to another + // (page-aligned) list with the next PRP. + self.next = PrpNext::List(entry, 0); + return self.get_next(); } } PrpNext::Done => { - // prior checks of self.remain should prevent us from ever reaching this + // prior checks of self.remain should prevent us from ever + // reaching this panic!() } }; @@ -830,3 +826,202 @@ impl From for Completion { } } } + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use crate::common::*; + use crate::vmm::mem::{MemCtx, PhysMap}; + + use super::PrpIter; + + const VM_SIZE: usize = 256 * PAGE_SIZE; + const PRP_PER_PAGE: usize = PAGE_SIZE / 8; + + fn setup() -> (PhysMap, Arc) { + let mut pmap = PhysMap::new_test(VM_SIZE); + pmap.add_test_mem("lowmem".to_string(), 0, VM_SIZE) + .expect("lowmem seg creation should succeed"); + + let memctx = pmap.memctx(); + (pmap, memctx) + } + + // Simple helpers to make math below more terse + const fn region(addr: u64, sz: u64) -> GuestRegion { + GuestRegion(GuestAddr(addr), sz as usize) + } + const fn pages(c: u64) -> u64 { + PAGE_SIZE as u64 * c + } + + #[test] + fn test_prp_single() { + let (_pmap, memctx) = setup(); + + // Basic single page + let mut iter = PrpIter::new(pages(1), 0x1000, 0, &memctx); + assert_eq!(iter.next(), Some(region(0x1000, pages(1)))); + assert_eq!(iter.next(), None); + + // Sub-single page + let sub = 0x200; + let mut iter = PrpIter::new(pages(1) - sub, 0x1000, 0, &memctx); + assert_eq!(iter.next(), Some(region(0x1000, pages(1) - sub))); + assert_eq!(iter.next(), None); + + // Sub-single page (with offset) + let mut iter = PrpIter::new(pages(1) - sub, 0x1000 + sub, 0, &memctx); + assert_eq!(iter.next(), Some(region(0x1000 + sub, pages(1) - sub))); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_prp_dual() { + let (_pmap, memctx) = setup(); + + // Basic dual page + let mut iter = PrpIter::new(pages(2), 0x1000, 0x2000, &memctx); + assert_eq!(iter.next(), Some(region(0x1000, pages(1)))); + assert_eq!(iter.next(), Some(region(0x2000, pages(1)))); + assert_eq!(iter.next(), None); + + // single page, split by offset + let sub = 0x200; + let mut iter = PrpIter::new(pages(1), 0x1000 + sub, 0x2000, &memctx); + assert_eq!(iter.next(), Some(region(0x1000 + sub, pages(1) - sub))); + assert_eq!(iter.next(), Some(region(0x2000, sub))); + assert_eq!(iter.next(), None); + + // less than single page, split by offset + let sz = pages(1) - 0x200; + let off = 0x400; + let mut iter = PrpIter::new(sz, 0x1000 + off, 0x2000, &memctx); + assert_eq!(iter.next(), Some(region(0x1000 + off, pages(1) - off))); + assert_eq!(iter.next(), Some(region(0x2000, sz - (pages(1) - off)))); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_prp_list() { + // Basic triple page (aligned prplist) + let (_pmap, memctx) = setup(); + let listprps: [u64; 2] = [0x2000, 0x3000]; + let listaddr = 0x80000; + memctx.write(GuestAddr(listaddr), &listprps); + let mut iter = PrpIter::new(pages(3), 0x1000, listaddr, &memctx); + assert_eq!(iter.next(), Some(region(0x1000, pages(1)))); + assert_eq!(iter.next(), Some(region(0x2000, pages(1)))); + assert_eq!(iter.next(), Some(region(0x3000, pages(1)))); + assert_eq!(iter.next(), None); + + // Basic triple page (offset prplist) + let (_pmap, memctx) = setup(); + let listprps: [u64; 2] = [0x2000, 0x3000]; + let listaddr = 0x80010; + memctx.write(GuestAddr(listaddr), &listprps); + let mut iter = PrpIter::new(pages(3), 0x1000, listaddr, &memctx); + assert_eq!(iter.next(), Some(region(0x1000, pages(1)))); + assert_eq!(iter.next(), Some(region(0x2000, pages(1)))); + assert_eq!(iter.next(), Some(region(0x3000, pages(1)))); + assert_eq!(iter.next(), None); + + // Offset triple page + let (_pmap, memctx) = setup(); + let listprps: [u64; 3] = [0x2000, 0x3000, 0x4000]; + let listaddr = 0x80000; + let off = 0x200; + memctx.write(GuestAddr(listaddr), &listprps); + let mut iter = PrpIter::new(pages(3), 0x1000 + off, listaddr, &memctx); + assert_eq!(iter.next(), Some(region(0x1000 + off, pages(1) - off))); + assert_eq!(iter.next(), Some(region(0x2000, pages(1)))); + assert_eq!(iter.next(), Some(region(0x3000, pages(1)))); + assert_eq!(iter.next(), Some(region(0x4000, off))); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_prp_list_offset_last() { + // List with offset, where last entry covers less than one page + let (_pmap, memctx) = setup(); + let listaddr = 0x80000u64; + let mut prps: Vec = Vec::with_capacity(PRP_PER_PAGE); + let mut bufaddr = 0x2000u64; + for _idx in 0..PRP_PER_PAGE { + prps.push(bufaddr); + bufaddr += pages(1); + } + memctx.write_many(GuestAddr(listaddr), &prps); + let off = 0x200; + let mut iter = PrpIter::new( + pages(PRP_PER_PAGE as u64), + 0x1000 + off, + listaddr, + &memctx, + ); + assert_eq!( + iter.next(), + Some(region(0x1000 + off, pages(1) - off)), + "prp1 entry incorrect" + ); + + let mut bufaddr = 0x2000u64; + for idx in 0..(PRP_PER_PAGE - 1) { + assert_eq!( + iter.next(), + Some(region(bufaddr, pages(1))), + "bad prp at idx: {idx}" + ); + bufaddr += pages(1); + } + // last prplist entry should be the remaining bytes left over from + // offsetting the prp1 entry + assert_eq!(iter.next(), Some(region(bufaddr, off))); + } + + #[test] + fn test_prp_multiple() { + // Basic multiple-page prplist + let (_pmap, memctx) = setup(); + let listaddrs = [0x80000u64, 0x81000u64]; + let mut prps: Vec = Vec::with_capacity(PRP_PER_PAGE); + let mut bufaddr = 0x2000u64; + + let entries_first = PRP_PER_PAGE - 1; + for _idx in 0..entries_first { + prps.push(bufaddr); + bufaddr += pages(1); + } + // Link to the next list page + prps.push(listaddrs[1]); + memctx.write_many(GuestAddr(listaddrs[0]), &prps[..]); + + // populate a few more entries in the next prplist + let entries_second = 4; + for idx in 0..entries_second { + prps[idx] = bufaddr; + bufaddr += pages(1); + } + memctx.write_many(GuestAddr(listaddrs[1]), &prps[..entries_second]); + + let total_entries = 1 + entries_first + entries_second; + let mut iter = PrpIter::new( + pages(total_entries as u64), + 0x1000, + listaddrs[0], + &memctx, + ); + + let mut bufaddr = 0x1000u64; + for idx in 0..total_entries { + assert_eq!( + iter.next(), + Some(region(bufaddr, pages(1))), + "bad prp at idx: {idx}" + ); + bufaddr += pages(1); + } + assert_eq!(iter.next(), None); + } +} diff --git a/lib/propolis/src/vmm/mem.rs b/lib/propolis/src/vmm/mem.rs index d4eca8715..b552625c9 100644 --- a/lib/propolis/src/vmm/mem.rs +++ b/lib/propolis/src/vmm/mem.rs @@ -6,7 +6,7 @@ use std::io::{Error, ErrorKind, Result}; use std::marker::PhantomData; -use std::mem::size_of; +use std::mem::{size_of, size_of_val}; use std::ops::RangeInclusive; use std::os::unix::io::{AsRawFd, RawFd}; use std::ptr::{copy_nonoverlapping, NonNull}; @@ -197,6 +197,12 @@ impl PhysMap { #[cfg(test)] impl PhysMap { + pub(crate) fn new_test(size: usize) -> Self { + let hdl = + VmmHdl::new_test(size).expect("create tempfile backed test hdl"); + Self::new(size, Arc::new(hdl)) + } + /// Create "memory" region on an instance backed with a fake VmmHdl pub(crate) fn add_test_mem( &mut self, @@ -451,14 +457,27 @@ impl<'a> SubMapping<'a> { Ok(self) } - /// Reads a `T` object from the mapping. - pub fn read(&self) -> Result { + /// Emit appropriate error if mapping does not allow writes + fn check_write_access(&self) -> Result<()> { + if !self.prot.contains(Prot::WRITE) { + Err(Error::new(ErrorKind::PermissionDenied, "No write access")) + } else { + Ok(()) + } + } + + /// Emit appropriate error if mapping does not allow reads + fn check_read_access(&self) -> Result<()> { if !self.prot.contains(Prot::READ) { - return Err(Error::new( - ErrorKind::PermissionDenied, - "No read access", - )); + Err(Error::new(ErrorKind::PermissionDenied, "No read access")) + } else { + Ok(()) } + } + + /// Reads a `T` object from the mapping. + pub fn read(&self) -> Result { + self.check_read_access()?; let typed = self.ptr.as_ptr() as *const T; if self.len < std::mem::size_of::() { return Err(Error::new(ErrorKind::InvalidData, "Buffer too small")); @@ -470,29 +489,46 @@ impl<'a> SubMapping<'a> { Ok(unsafe { typed.read_unaligned() }) } - /// Reads a buffer of bytes from the mapping. - pub fn read_bytes(&self, buf: &mut [u8]) -> Result { - if !self.prot.contains(Prot::READ) { + /// Read `values` from the mapping. + pub fn read_many(&self, values: &mut [T]) -> Result<()> { + self.check_read_access()?; + let copy_len = size_of_val(values); + if self.len < copy_len { return Err(Error::new( - ErrorKind::PermissionDenied, - "No read access", + ErrorKind::InvalidInput, + "Value larger than mapping", )); } - let to_copy = usize::min(buf.len(), self.len); - let src = self.ptr.as_ptr(); - let dst = buf.as_mut_ptr(); - // Safety: - // - src must be valid for reads of to_copy * size_of::() bytes. - // - dst must be valid for writes of count * size_of::() bytes. - // - Both src and dst must be properly aligned. - // - The region of memory beginning at src with a size of count * - // size_of::() bytes must not overlap with the region of memory beginning - // at dst with the same size. + // We know that the `values` reference is properly aligned, but that is + // not guaranteed for the source pointer. Cast it down to a u8, which + // will appease those alignment concerns + let src = self.ptr.as_ptr() as *const u8; + let dst = values.as_mut_ptr() as *mut u8; + + // Safety + // - `src` is valid for read for the `copy_len` as checked above + // - `src` is valid for writes for its entire length, since it is from a + // valid mutable reference passed in to us + // - both are aligned for a `u8` copy + // - `dst` cannot be overlapped by `src`, since the former came from a + // valid reference, and references to guest mappings are not allowed unsafe { - copy_nonoverlapping(src, dst, to_copy); + copy_nonoverlapping(src, dst, copy_len); } - Ok(to_copy) + Ok(()) + } + + /// Reads a buffer of bytes from the mapping. + /// + /// If `buf` is larger than the SubMapping, the read will be truncated to + /// length of the SubMapping. + /// + /// Returns the number of bytes read. + pub fn read_bytes(&self, buf: &mut [u8]) -> Result { + let read_len = usize::min(buf.len(), self.len); + self.read_many(&mut buf[..read_len])?; + Ok(read_len) } /// Pread from `file` into the mapping. @@ -502,13 +538,7 @@ impl<'a> SubMapping<'a> { length: usize, offset: i64, ) -> Result { - if !self.prot.contains(Prot::WRITE) { - return Err(Error::new( - ErrorKind::PermissionDenied, - "No write access", - )); - } - + self.check_write_access()?; let to_read = usize::min(length, self.len); let read = unsafe { libc::pread( @@ -526,12 +556,7 @@ impl<'a> SubMapping<'a> { /// Writes `value` into the mapping. pub fn write(&self, value: &T) -> Result<()> { - if !self.prot.contains(Prot::WRITE) { - return Err(Error::new( - ErrorKind::PermissionDenied, - "No write access", - )); - } + self.check_write_access()?; let typed = self.ptr.as_ptr() as *mut T; unsafe { typed.write_unaligned(*value); @@ -539,40 +564,51 @@ impl<'a> SubMapping<'a> { Ok(()) } - /// Writes a buffer of bytes into the mapping. - pub fn write_bytes(&self, buf: &[u8]) -> Result { - if !self.prot.contains(Prot::WRITE) { + /// Writes `values` into the mapping. + pub fn write_many(&self, values: &[T]) -> Result<()> { + self.check_write_access()?; + let copy_len = size_of_val(values); + if self.len < copy_len { return Err(Error::new( - ErrorKind::PermissionDenied, - "No write access", + ErrorKind::InvalidInput, + "Value larger than mapping", )); } - let to_copy = usize::min(buf.len(), self.len); - let src = buf.as_ptr(); - let dst = self.ptr.as_ptr(); - - // Safety: - // - src must be valid for reads of count * size_of::() bytes. - // - dst must be valid for writes of count * size_of::() bytes. - // - Both src and dst must be properly aligned. - // - The region of memory beginning at src with a size of count * - // size_of::() bytes must not overlap with the region of memory beginning - // at dst with the same size. + // We know that the `values` reference is properly aligned, but that is + // not guaranteed for the destination pointer. Cast it down to a u8, + // which will appease those alignment concerns + let src = values.as_ptr() as *const u8; + let dst = self.ptr.as_ptr() as *mut u8; + + // Safety + // - `src` is valid for reads for its entire length, since it is from a + // valid reference passed in to us + // - `dst` is valid for writes for the `copy_len` as checked above + // - both are aligned for a `u8` copy + // - `dst` cannot be overlapped by `src`, since the latter came from a + // valid reference, and references to guest mappings are not allowed unsafe { - copy_nonoverlapping(src, dst, to_copy); + copy_nonoverlapping(src, dst, copy_len); } - Ok(to_copy) + Ok(()) + } + + /// Writes a buffer of bytes into the mapping. + /// + /// If `buf` is larger than the SubMapping, the write will be truncated to + /// length of the SubMapping. + /// + /// Returns the number of bytes read. + pub fn write_bytes(&self, buf: &[u8]) -> Result { + let write_len = usize::min(buf.len(), self.len); + self.write_many(&buf[..write_len])?; + Ok(write_len) } /// Writes a single byte `val` to the mapping, `count` times. pub fn write_byte(&self, val: u8, count: usize) -> Result { - if !self.prot.contains(Prot::WRITE) { - return Err(Error::new( - ErrorKind::PermissionDenied, - "No write access", - )); - } + self.check_write_access()?; let to_copy = usize::min(count, self.len); unsafe { self.ptr.as_ptr().write_bytes(val, to_copy); @@ -587,13 +623,7 @@ impl<'a> SubMapping<'a> { length: usize, offset: i64, ) -> Result { - if !self.prot.contains(Prot::READ) { - return Err(Error::new( - ErrorKind::PermissionDenied, - "No write access", - )); - } - + self.check_read_access()?; let to_write = usize::min(length, self.len); let written = unsafe { libc::pwrite( @@ -822,6 +852,19 @@ impl MemCtx { false } } + /// Write multiple values to guest memory + /// + /// If the memory offset and value(s) size would result in the copy crossing + /// vmm memory segments, this will fail. + pub fn write_many(&self, addr: GuestAddr, val: &[T]) -> bool { + if let Some(mapping) = + self.region_covered(addr, size_of_val(val), Prot::WRITE) + { + mapping.write_many(val).is_ok() + } else { + false + } + } pub fn writable_region(&self, region: &GuestRegion) -> Option { let mapping = self.region_covered(region.0, region.1, Prot::WRITE)?; @@ -1013,6 +1056,15 @@ pub mod tests { assert_eq!(TEST_LEN, mapping.read_bytes(&mut buf).unwrap()); } + #[test] + fn mapping_shortens_write_bytes_beyond_end() { + let (_hdl, base) = test_setup(Prot::RW); + let mapping = SubMapping::new_base_test(base); + + let mut buf: [u8; TEST_LEN + 1] = [0; TEST_LEN + 1]; + assert_eq!(TEST_LEN, mapping.write_bytes(&mut buf).unwrap()); + } + #[test] fn mapping_create_empty() { let (_hdl, base) = test_setup(Prot::READ);