diff --git a/src/vmm/src/devices/virtio/iovec.rs b/src/vmm/src/devices/virtio/iovec.rs index 5a015975df0..674521bf4e3 100644 --- a/src/vmm/src/devices/virtio/iovec.rs +++ b/src/vmm/src/devices/virtio/iovec.rs @@ -367,10 +367,15 @@ impl IoVecBufferMut { /// In contrast to the equivalent [`IoVecBuffer::len()`] which returns `u32`, this one returns /// `usize` since the buffer can contain multiple `DescriptorChain` objects, so we don't have /// the limit that the length of a buffer is limited by `u32`. - pub(crate) fn len(&self) -> usize { + pub fn len(&self) -> usize { self.len } + /// Returns true if there is buffer is empty. + pub fn is_empty(&self) -> bool { + self.len == 0 + } + /// Returns a pointer to the memory keeping the `iovec` structs pub fn as_iovec_mut_slice(&mut self) -> &mut [iovec] { self.vecs.as_mut_slice() diff --git a/src/vmm/src/devices/virtio/net/device.rs b/src/vmm/src/devices/virtio/net/device.rs index ed6f36dcb0b..543e4b6e13a 100755 --- a/src/vmm/src/devices/virtio/net/device.rs +++ b/src/vmm/src/devices/virtio/net/device.rs @@ -6,11 +6,12 @@ // found in the THIRD-PARTY file. use std::collections::VecDeque; -use std::mem; +use std::mem::{self}; use std::net::Ipv4Addr; +use std::num::{NonZeroU32, Wrapping}; use std::sync::{Arc, Mutex}; -use libc::EAGAIN; +use libc::{iovec, EAGAIN}; use log::error; use vmm_sys_util::eventfd::EventFd; @@ -19,7 +20,7 @@ use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1; use crate::devices::virtio::gen::virtio_net::{ virtio_net_hdr_v1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4, - VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, + VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, }; use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use crate::devices::virtio::iovec::{ @@ -108,7 +109,8 @@ pub struct RxBuffers { // A map of which part of the memory belongs to which `DescriptorChain` object pub parsed_descriptors: VecDeque, // Buffers that we have used and they are ready to be given back to the guest. - pub deferred_descriptor: Option, + pub used_descriptors: u16, + pub used_bytes: u32, } impl RxBuffers { @@ -118,7 +120,8 @@ impl RxBuffers { min_buffer_size: 0, iovec: IoVecBufferMut::new()?, parsed_descriptors: VecDeque::with_capacity(FIRECRACKER_MAX_QUEUE_SIZE.into()), - deferred_descriptor: None, + used_descriptors: 0, + used_bytes: 0, }) } @@ -141,18 +144,12 @@ impl RxBuffers { Ok(()) } - /// Returns the number of available `iovec` objects. + /// Returns the total size of available space in the buffer. #[inline(always)] - fn len(&self) -> usize { + fn capacity(&self) -> usize { self.iovec.len() } - /// Returns `true` if there aren't any available `iovec` objects. - #[inline(always)] - fn is_empty(&self) -> bool { - self.len() == 0 - } - /// Mark the first `size` bytes of available memory as used. /// /// # Safety: @@ -160,18 +157,37 @@ impl RxBuffers { /// * The `RxBuffers` should include at least one parsed `DescriptorChain`. /// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored /// in the `RxBuffers`. - unsafe fn mark_used(&mut self, size: u32) -> ParsedDescriptorChain { - // Since we were able to write a frame in guest memory, we should have at least one - // descriptor chain here. If not, we have a bug, so fail fast, since the device is - // fundamentally broken. - let mut parsed_dc = self.parsed_descriptors.pop_front().expect( - "net: internal bug. Mismatch between written frame size and available descriptors", - ); + unsafe fn mark_used(&mut self, mut bytes_written: u32, rx_queue: &mut Queue) { + self.used_bytes = bytes_written; + + let mut used_heads: u16 = 0; + for parsed_dc in self.parsed_descriptors.iter() { + let used_bytes = bytes_written.min(parsed_dc.length); + // Safe because we know head_index isn't out of bounds + rx_queue + .write_used_element( + (rx_queue.next_used + Wrapping(used_heads)).0, + parsed_dc.head_index, + used_bytes, + ) + .unwrap(); + bytes_written -= used_bytes; + used_heads += 1; + self.used_descriptors += 1; + + if bytes_written == 0 { + break; + } + } - self.header_set_num_buffers(1); - self.iovec.drop_descriptor_chain(&parsed_dc); - parsed_dc.length = size; - parsed_dc + self.header_set_num_buffers(used_heads); + for _ in 0..used_heads { + let parsed_dc = self + .parsed_descriptors + .pop_front() + .expect("This should never happen if write to the buffer succeded."); + self.iovec.drop_descriptor_chain(&parsed_dc); + } } /// Write the number of descriptors used in VirtIO header @@ -190,14 +206,24 @@ impl RxBuffers { /// This will let the guest know that about all the `DescriptorChain` object that has been /// used to receive a frame from the TAP. - fn finish_frame(&mut self, dc: &ParsedDescriptorChain, rx_queue: &mut Queue) { - // It is fine to `.unrap()` here. The only reason why `add_used` can fail is if the - // `head_index` is not a valid descriptor id. `head_index` here is a valid - // `DescriptorChain` index. We got it from `queue.pop_or_enable_notification()` which - // checks for its validity. In other words, if this unwrap() fails there's a bug in our - // emulation logic which, most likely, we can't recover from. So, let's crash here - // instead of logging an error and continuing. - rx_queue.add_used(dc.head_index, dc.length).unwrap(); + fn finish_frame(&mut self, rx_queue: &mut Queue) { + rx_queue.advance_used_ring(self.used_descriptors); + self.used_descriptors = 0; + self.used_bytes = 0; + } + + /// Return a slice of iovecs for the first slice in the buffer. + /// + /// # Safety + /// Buffer needs to have at least one descriptor chain parsed. + unsafe fn single_chain_slice_mut(&mut self) -> &mut [iovec] { + let nr_iovecs = self.parsed_descriptors[0].nr_iovecs as usize; + &mut self.iovec.as_iovec_mut_slice()[..nr_iovecs] + } + + /// Return a slice of iovecs for all descriptor chains in the buffer. + fn all_chains_slice_mut(&mut self) -> &mut [iovec] { + self.iovec.as_iovec_mut_slice() } } @@ -260,6 +286,7 @@ impl Net { | 1 << VIRTIO_NET_F_HOST_TSO6 | 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_NET_F_MRG_RXBUF | 1 << VIRTIO_RING_F_EVENT_IDX; let mut config_space = ConfigSpace::default(); @@ -407,27 +434,39 @@ impl Net { // Attempts to copy a single frame into the guest if there is enough // rate limiting budget. // Returns true on successful frame delivery. - fn rate_limited_rx_single_frame(&mut self, dc: &ParsedDescriptorChain) -> bool { + fn rate_limited_rx_single_frame(&mut self, frame_size: u32) -> bool { let rx_queue = &mut self.queues[RX_INDEX]; - if !Self::rate_limiter_consume_op(&mut self.rx_rate_limiter, dc.length as u64) { + if !Self::rate_limiter_consume_op(&mut self.rx_rate_limiter, frame_size as u64) { self.metrics.rx_rate_limiter_throttled.inc(); return false; } - self.rx_buffer.finish_frame(dc, rx_queue); + self.rx_buffer.finish_frame(rx_queue); true } /// Returns the minimum size of buffer we expect the guest to provide us depending on the /// features we have negotiated with it fn minimum_rx_buffer_size(&self) -> u32 { - if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64) - || self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64) - || self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64) - { - 65562 + if !self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) { + if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64) + || self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64) + || self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64) + { + // Max buffer size is 65562 + #[allow(clippy::cast_possible_truncation)] + { + MAX_BUFFER_SIZE as u32 + } + } else { + 1526 + } } else { - 1526 + // header is 12 bytes long + #[allow(clippy::cast_possible_truncation)] + { + vnet_hdr_len() as u32 + } } } @@ -442,6 +481,9 @@ impl Net { if let Err(err) = unsafe { self.rx_buffer.add_buffer(mem, head) } { self.metrics.rx_fails.inc(); error!("net: Could not parse an RX descriptor: {err}"); + // Notify queue about ready frames. We need this + // to bring queue into up to date state. + self.rx_buffer.finish_frame(queue); // Try to add the bad descriptor to the used ring. if let Err(err) = queue.add_used(index, 0) { error!( @@ -528,16 +570,15 @@ impl Net { } // We currently prioritize packets from the MMDS over regular network packets. - fn read_from_mmds_or_tap(&mut self) -> Result, NetError> { - // If we don't have any buffers available try to parse more from the RX queue. There might - // be some buffers we didn't get the chance to process, because we got to handle the TAP - // event before the RX queue event. - if self.rx_buffer.is_empty() { + fn read_from_mmds_or_tap(&mut self) -> Result, NetError> { + // We only want to read from TAP (or mmds) if we have at least 64K of available capacity as + // this is the max size of 1 packet. + if self.rx_buffer.capacity() < MAX_BUFFER_SIZE { self.parse_rx_descriptors(); - // If after parsing the RX queue we still don't have any buffers stop processing RX + // If after parsing the RX queue we still don't have enough capacity, stop processing RX // frames. - if self.rx_buffer.is_empty() { + if self.rx_buffer.capacity() < MAX_BUFFER_SIZE { return Ok(None); } } @@ -553,16 +594,22 @@ impl Net { self.rx_buffer .iovec .write_all_volatile_at(&self.rx_frame_buf[..vnet_hdr_len() + len], 0)?; + + // SAFETY: + // * len is never 0 + // * Maximum size of the packet is 65562 which fits into u32 + #[allow(clippy::cast_possible_truncation)] + let len = unsafe { NonZeroU32::new_unchecked((vnet_hdr_len() + len) as u32) }; + // SAFETY: This is safe: // * We checked that `rx_buffer` includes at least one `DescriptorChain` // * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects // are at least that big. - let dc = unsafe { + unsafe { self.rx_buffer - .mark_used((vnet_hdr_len() + len).try_into().unwrap()) - }; - - return Ok(Some(dc)); + .mark_used(len.get(), &mut self.queues[RX_INDEX]); + } + return Ok(Some(len)); } } @@ -570,12 +617,21 @@ impl Net { // DescriptorChain parsed in it. let len = unsafe { self.read_tap().map_err(NetError::IO) }?; + // SAFETY: + // * len is never 0 + // * Maximum size of the packet is 65562 which fits into u32 + #[allow(clippy::cast_possible_truncation)] + let len = unsafe { NonZeroU32::new_unchecked(len as u32) }; + // SAFETY: This is safe, // * `rx_buffer` has at least one `DescriptorChain` // * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more // bytes than its capacity. - let dc = unsafe { self.rx_buffer.mark_used(len.try_into().unwrap()) }; - Ok(Some(dc)) + unsafe { + self.rx_buffer + .mark_used(len.get(), &mut self.queues[RX_INDEX]); + } + Ok(Some(len)) } /// Read as many frames as possible. @@ -586,12 +642,11 @@ impl Net { self.metrics.no_rx_avail_buffer.inc(); break; } - Ok(Some(dc)) => { + Ok(Some(bytes)) => { self.metrics.rx_count.inc(); - self.metrics.rx_bytes_count.add(dc.length as u64); + self.metrics.rx_bytes_count.add(bytes.get() as u64); self.metrics.rx_packets_count.inc(); - if !self.rate_limited_rx_single_frame(&dc) { - self.rx_buffer.deferred_descriptor = Some(dc); + if !self.rate_limited_rx_single_frame(bytes.get()) { break; } } @@ -619,11 +674,10 @@ impl Net { fn resume_rx(&mut self) -> Result<(), DeviceError> { // First try to handle any deferred frame - if let Some(deferred_descriptor) = self.rx_buffer.deferred_descriptor.take() { + if self.rx_buffer.used_bytes != 0 { // If can't finish sending this frame, re-set it as deferred and return; we can't // process any more frames from the TAP. - if !self.rate_limited_rx_single_frame(&deferred_descriptor) { - self.rx_buffer.deferred_descriptor = Some(deferred_descriptor); + if !self.rate_limited_rx_single_frame(self.rx_buffer.used_bytes) { return Ok(()); } } @@ -689,7 +743,7 @@ impl Net { &self.metrics, ) .unwrap_or(false); - if frame_consumed_by_mmds && self.rx_buffer.deferred_descriptor.is_none() { + if frame_consumed_by_mmds && self.rx_buffer.used_bytes == 0 { // MMDS consumed this frame/request, let's also try to process the response. process_rx_for_mmds = true; } @@ -774,9 +828,13 @@ impl Net { /// /// `self.rx_buffer` needs to have at least one descriptor chain parsed pub unsafe fn read_tap(&mut self) -> std::io::Result { - let nr_iovecs = self.rx_buffer.parsed_descriptors[0].nr_iovecs as usize; - self.tap - .read_iovec(&mut self.rx_buffer.iovec.as_iovec_mut_slice()[..nr_iovecs]) + let slice = if self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) { + self.rx_buffer.all_chains_slice_mut() + } else { + // SAFETY: we only call this if `rx_buffer` is not empty. + unsafe { self.rx_buffer.single_chain_slice_mut() } + }; + self.tap.read_iovec(slice) } fn write_tap(tap: &mut Tap, buf: &IoVecBuffer) -> std::io::Result { @@ -978,6 +1036,7 @@ impl VirtioDevice for Net { #[cfg(test)] #[macro_use] +#[allow(clippy::cast_possible_truncation)] pub mod tests { use std::net::Ipv4Addr; use std::os::fd::AsRawFd; @@ -985,6 +1044,8 @@ pub mod tests { use std::time::Duration; use std::{mem, thread}; + use vm_memory::GuestAddress; + use super::*; use crate::check_metric_after_block; use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX; @@ -999,6 +1060,7 @@ pub mod tests { }; use crate::devices::virtio::net::NET_QUEUE_SIZES; use crate::devices::virtio::queue::VIRTQ_DESC_F_WRITE; + use crate::devices::virtio::test_utils::VirtQueue; use crate::dumbo::pdu::arp::{EthIPv4ArpFrame, ETH_IPV4_FRAME_LEN}; use crate::dumbo::pdu::ethernet::ETHERTYPE_ARP; use crate::dumbo::EthernetFrame; @@ -1066,6 +1128,7 @@ pub mod tests { | 1 << VIRTIO_NET_F_HOST_TSO6 | 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_NET_F_MRG_RXBUF | 1 << VIRTIO_RING_F_EVENT_IDX; assert_eq!( @@ -1187,10 +1250,7 @@ pub mod tests { assert_eq!(th.rxq.used.idx.get(), 0); } - #[test] - fn test_rx_read_only_descriptor() { - let mem = single_region_mem(2 * MAX_BUFFER_SIZE); - let mut th = TestHelper::get_default(&mem); + fn rx_read_only_descriptor(mut th: TestHelper) { th.activate_net(); th.add_desc_chain( @@ -1214,12 +1274,25 @@ pub mod tests { } #[test] - fn test_rx_short_writable_descriptor() { + fn test_rx_read_only_descriptor() { + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_read_only_descriptor(th); + } + + #[test] + fn test_rx_read_only_descriptor_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_read_only_descriptor(th); + } + + fn rx_short_descriptor(mut th: TestHelper) { th.activate_net(); - th.add_desc_chain(NetQueue::Rx, 0, &[(0, 100, VIRTQ_DESC_F_WRITE)]); + th.add_desc_chain(NetQueue::Rx, 0, &[(0, 10, VIRTQ_DESC_F_WRITE)]); let mut frame = th.check_rx_discarded_buffer(1000); th.rxq.check_used_elem(0, 0, 0); @@ -1228,9 +1301,22 @@ pub mod tests { } #[test] - fn test_rx_partial_write() { + fn test_rx_short_descriptor() { + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_short_descriptor(th); + } + + #[test] + fn test_rx_short_descriptor_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_short_descriptor(th); + } + + fn rx_invalid_descriptor(mut th: TestHelper) { th.activate_net(); // The descriptor chain is created so that the last descriptor doesn't fit in the @@ -1253,9 +1339,22 @@ pub mod tests { } #[test] - fn test_rx_retry() { + fn test_rx_invalid_descriptor() { + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_invalid_descriptor(th); + } + + #[test] + fn test_rx_invalid_descriptor_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_invalid_descriptor(th); + } + + fn rx_retry(mut th: TestHelper) { th.activate_net(); // Add invalid descriptor chain - read only descriptor. @@ -1269,7 +1368,7 @@ pub mod tests { ], ); // Add invalid descriptor chain - too short. - th.add_desc_chain(NetQueue::Rx, 1200, &[(3, 100, VIRTQ_DESC_F_WRITE)]); + th.add_desc_chain(NetQueue::Rx, 1200, &[(3, 10, VIRTQ_DESC_F_WRITE)]); // Add invalid descriptor chain - invalid memory offset. th.add_desc_chain( NetQueue::Rx, @@ -1279,7 +1378,11 @@ pub mod tests { // Add valid descriptor chain. TestHelper does not negotiate any feature offloading so the // buffers need to be at least 1526 bytes long. - th.add_desc_chain(NetQueue::Rx, 1300, &[(5, 1526, VIRTQ_DESC_F_WRITE)]); + th.add_desc_chain( + NetQueue::Rx, + 1300, + &[(5, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], + ); // Inject frame to tap and run epoll. let mut frame = inject_tap_tx_frame(&th.net(), 1000); @@ -1297,7 +1400,7 @@ pub mod tests { th.rxq.check_used_elem(1, 3, 0); th.rxq.check_used_elem(2, 4, 0); // Check that the frame wasn't deferred. - assert!(th.net().rx_buffer.deferred_descriptor.is_none()); + assert!(th.net().rx_buffer.used_descriptors == 0); // Check that the frame has been written successfully to the valid Rx descriptor chain. th.rxq .check_used_elem(3, 5, frame.len().try_into().unwrap()); @@ -1306,9 +1409,22 @@ pub mod tests { } #[test] - fn test_rx_complex_desc_chain() { + fn test_rx_retry() { + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_retry(th); + } + + #[test] + fn test_rx_retry_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_retry(th); + } + + fn rx_complex_desc_chain(mut th: TestHelper) { th.activate_net(); // Create a valid Rx avail descriptor chain with multiple descriptors. @@ -1320,7 +1436,7 @@ pub mod tests { &[ (3, 100, VIRTQ_DESC_F_WRITE), (5, 50, VIRTQ_DESC_F_WRITE), - (11, 4096, VIRTQ_DESC_F_WRITE), + (11, MAX_BUFFER_SIZE as u32 - 100 - 50, VIRTQ_DESC_F_WRITE), ], ); // Inject frame to tap and run epoll. @@ -1332,7 +1448,7 @@ pub mod tests { ); // Check that the frame wasn't deferred. - assert!(th.net().rx_buffer.deferred_descriptor.is_none()); + assert!(th.net().rx_buffer.used_descriptors == 0); // Check that the used queue has advanced. assert_eq!(th.rxq.used.idx.get(), 1); assert!(&th.net().irq_trigger.has_pending_irq(IrqType::Vring)); @@ -1346,9 +1462,22 @@ pub mod tests { } #[test] - fn test_rx_multiple_frames() { + fn test_rx_complex_desc_chain() { + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_complex_desc_chain(th); + } + + #[test] + fn test_rx_complex_desc_chain_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_complex_desc_chain(th); + } + + fn rx_multiple_frames(mut th: TestHelper) { th.activate_net(); // Create 2 valid Rx avail descriptor chains. Each one has enough space to fit the @@ -1359,16 +1488,17 @@ pub mod tests { &[ (0, 500, VIRTQ_DESC_F_WRITE), (1, 500, VIRTQ_DESC_F_WRITE), - (2, 526, VIRTQ_DESC_F_WRITE), + (2, MAX_BUFFER_SIZE as u32 - 1000, VIRTQ_DESC_F_WRITE), ], ); + // Second chain needs at least MAX_BUFFER_SIZE offset th.add_desc_chain( NetQueue::Rx, - 2000, + MAX_BUFFER_SIZE as u64 + 1000, &[ (3, 500, VIRTQ_DESC_F_WRITE), (4, 500, VIRTQ_DESC_F_WRITE), - (5, 526, VIRTQ_DESC_F_WRITE), + (5, MAX_BUFFER_SIZE as u32 - 1000, VIRTQ_DESC_F_WRITE), ], ); // Inject 2 frames to tap and run epoll. @@ -1381,7 +1511,7 @@ pub mod tests { ); // Check that the frames weren't deferred. - assert!(th.net().rx_buffer.deferred_descriptor.is_none()); + assert!(th.net().rx_buffer.used_bytes == 0); // Check that the used queue has advanced. assert_eq!(th.rxq.used.idx.get(), 2); assert!(&th.net().irq_trigger.has_pending_irq(IrqType::Vring)); @@ -1391,14 +1521,30 @@ pub mod tests { .check_used_elem(0, 0, frame_1.len().try_into().unwrap()); th.rxq.dtable[0].check_data(&frame_1); th.rxq.dtable[1].check_data(&[0; 500]); - th.rxq.dtable[2].check_data(&[0; 526]); + th.rxq.dtable[2].check_data(&[0; MAX_BUFFER_SIZE - 1000]); // Check that the 2nd frame was written successfully to the 2nd Rx descriptor chain. header_set_num_buffers(frame_2.as_mut_slice(), 1); th.rxq .check_used_elem(1, 3, frame_2.len().try_into().unwrap()); th.rxq.dtable[3].check_data(&frame_2); th.rxq.dtable[4].check_data(&[0; 500]); - th.rxq.dtable[2].check_data(&[0; 526]); + th.rxq.dtable[5].check_data(&[0; MAX_BUFFER_SIZE - 1000]); + } + + #[test] + fn test_rx_multiple_frames() { + let mem = single_region_mem(3 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_multiple_frames(th); + } + + #[test] + fn test_rx_multiple_frames_mrg() { + let mem = single_region_mem(3 * MAX_BUFFER_SIZE); + let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_multiple_frames(th); } #[test] @@ -1685,9 +1831,13 @@ pub mod tests { fn test_mmds_detour_and_injection() { let mut net = default_net(); + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let rxq = VirtQueue::new(GuestAddress(0), &mem, 16); + net.queues[RX_INDEX] = rxq.create_queue(); + // Inject a fake buffer in the devices buffers, otherwise we won't be able to receive the // MMDS frame. One iovec will be just fine. - let mut fake_buffer = vec![0u8; 1024]; + let mut fake_buffer = vec![0u8; MAX_BUFFER_SIZE]; let iov_buffer = IoVecBufferMut::from(fake_buffer.as_mut_slice()); net.rx_buffer.iovec = iov_buffer; net.rx_buffer @@ -1815,11 +1965,8 @@ pub mod tests { unsafe { libc::close(th.net.lock().unwrap().tap.as_raw_fd()) }; // The RX queue is empty and there is a deferred frame. - th.net().rx_buffer.deferred_descriptor = Some(ParsedDescriptorChain { - head_index: 1, - length: 100, - nr_iovecs: 1, - }); + th.net().rx_buffer.used_descriptors = 1; + th.net().rx_buffer.used_bytes = 100; check_metric_after_block!( th.net().metrics.no_rx_avail_buffer, 1, @@ -1829,9 +1976,14 @@ pub mod tests { // We need to set this here to false, otherwise the device will try to // handle a deferred frame, it will fail and will never try to read from // the tap. - th.net().rx_buffer.deferred_descriptor = None; + th.net().rx_buffer.used_descriptors = 0; + th.net().rx_buffer.used_bytes = 0; - th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]); + th.add_desc_chain( + NetQueue::Rx, + 0, + &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], + ); check_metric_after_block!( th.net().metrics.tap_read_fails, 1, @@ -1951,8 +2103,12 @@ pub mod tests { let mut rl = RateLimiter::new(1000, 0, 500, 0, 0, 0).unwrap(); // set up RX - assert!(th.net().rx_buffer.deferred_descriptor.is_none()); - th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]); + assert!(th.net().rx_buffer.used_descriptors == 0); + th.add_desc_chain( + NetQueue::Rx, + 0, + &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], + ); let mut frame = inject_tap_tx_frame(&th.net(), 1000); @@ -1971,7 +2127,7 @@ pub mod tests { // assert that limiter is blocked assert!(th.net().rx_rate_limiter.is_blocked()); assert_eq!(th.net().metrics.rx_rate_limiter_throttled.count(), 1); - assert!(th.net().rx_buffer.deferred_descriptor.is_some()); + assert!(th.net().rx_buffer.used_descriptors != 0); // assert that no operation actually completed (limiter blocked it) assert!(&th.net().irq_trigger.has_pending_irq(IrqType::Vring)); // make sure the data is still queued for processing @@ -2070,8 +2226,12 @@ pub mod tests { let mut rl = RateLimiter::new(0, 0, 0, 1, 0, 500).unwrap(); // set up RX - assert!(th.net().rx_buffer.deferred_descriptor.is_none()); - th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]); + assert!(th.net().rx_buffer.used_descriptors == 0); + th.add_desc_chain( + NetQueue::Rx, + 0, + &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], + ); let mut frame = inject_tap_tx_frame(&th.net(), 1234); // use up the initial budget @@ -2092,7 +2252,7 @@ pub mod tests { // assert that limiter is blocked assert!(th.net().rx_rate_limiter.is_blocked()); assert!(th.net().metrics.rx_rate_limiter_throttled.count() >= 1); - assert!(th.net().rx_buffer.deferred_descriptor.is_some()); + assert!(th.net().rx_buffer.used_descriptors != 0); // assert that no operation actually completed (limiter blocked it) assert!(&th.net().irq_trigger.has_pending_irq(IrqType::Vring)); // make sure the data is still queued for processing diff --git a/src/vmm/src/devices/virtio/net/persist.rs b/src/vmm/src/devices/virtio/net/persist.rs index c7918f07ab3..41d65c1fa39 100644 --- a/src/vmm/src/devices/virtio/net/persist.rs +++ b/src/vmm/src/devices/virtio/net/persist.rs @@ -12,7 +12,6 @@ use serde::{Deserialize, Serialize}; use super::device::{Net, RxBuffers}; use super::{TapError, NET_NUM_QUEUES, RX_INDEX}; use crate::devices::virtio::device::DeviceState; -use crate::devices::virtio::iovec::ParsedDescriptorChain; use crate::devices::virtio::persist::{PersistError as VirtioStateError, VirtioDeviceState}; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::devices::virtio::TYPE_NET; @@ -37,14 +36,15 @@ pub struct NetConfigSpaceState { pub struct RxBufferState { // Number of iovecs we have parsed from the guest parsed_descriptor_chains_nr: u16, - deferred_descriptor: Option, + // Number of deferred_descriptors + deferred_descriptors: u16, } impl RxBufferState { fn from_rx_buffers(rx_buffer: &RxBuffers) -> Self { RxBufferState { parsed_descriptor_chains_nr: rx_buffer.parsed_descriptors.len().try_into().unwrap(), - deferred_descriptor: rx_buffer.deferred_descriptor.clone(), + deferred_descriptors: rx_buffer.used_descriptors, } } } @@ -162,9 +162,7 @@ impl Persist<'_> for Net { // rolling back `next_avail` in the RX queue and call `parse_rx_descriptors`. net.queues[RX_INDEX].next_avail -= state.rx_buffers_state.parsed_descriptor_chains_nr; net.parse_rx_descriptors(); - net.rx_buffer - .deferred_descriptor - .clone_from(&state.rx_buffers_state.deferred_descriptor); + net.rx_buffer.used_descriptors = state.rx_buffers_state.deferred_descriptors; } Ok(net) diff --git a/src/vmm/src/devices/virtio/net/test_utils.rs b/src/vmm/src/devices/virtio/net/test_utils.rs index eb1c6f6e883..004bfc56682 100644 --- a/src/vmm/src/devices/virtio/net/test_utils.rs +++ b/src/vmm/src/devices/virtio/net/test_utils.rs @@ -295,8 +295,9 @@ pub fn assign_queues(net: &mut Net, rxq: Queue, txq: Queue) { } #[cfg(test)] +#[allow(clippy::cast_possible_truncation)] +#[allow(clippy::undocumented_unsafe_blocks)] pub mod test { - #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::ffi::OsStrExt; use std::sync::{Arc, Mutex, MutexGuard}; use std::{cmp, fmt}; @@ -310,7 +311,7 @@ pub mod test { use crate::devices::virtio::net::test_utils::{ assign_queues, default_net, inject_tap_tx_frame, NetEvent, NetQueue, }; - use crate::devices::virtio::net::{Net, RX_INDEX, TX_INDEX}; + use crate::devices::virtio::net::{Net, MAX_BUFFER_SIZE, RX_INDEX, TX_INDEX}; use crate::devices::virtio::queue::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; use crate::devices::virtio::test_utils::{VirtQueue, VirtqDesc}; use crate::logger::IncMetric; @@ -453,9 +454,12 @@ pub mod test { /// is eventually received by the guest pub fn check_rx_queue_resume(&mut self, expected_frame: &[u8]) { let used_idx = self.rxq.used.idx.get(); - // Add a valid Rx avail descriptor chain and run epoll. We do not negotiate any feature - // offloading so the buffers need to be at least 1526 bytes long. - self.add_desc_chain(NetQueue::Rx, 0, &[(0, 1526, VIRTQ_DESC_F_WRITE)]); + // Add a valid Rx avail descriptor chain and run epoll. + self.add_desc_chain( + NetQueue::Rx, + 0, + &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], + ); check_metric_after_block!( self.net().metrics.rx_packets_count, 1, diff --git a/src/vmm/src/devices/virtio/rng/device.rs b/src/vmm/src/devices/virtio/rng/device.rs index 6a73816947a..bd7ea151706 100644 --- a/src/vmm/src/devices/virtio/rng/device.rs +++ b/src/vmm/src/devices/virtio/rng/device.rs @@ -108,7 +108,7 @@ impl Entropy { fn handle_one(&self, iovec: &mut IoVecBufferMut) -> Result { // If guest provided us with an empty buffer just return directly - if iovec.len() == 0 { + if iovec.is_empty() { return Ok(0); }