diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 1f52fdad063..7486f976fbe 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -464,6 +464,7 @@ pub fn build_microvm_from_snapshot( ) -> Result>, BuildMicrovmFromSnapshotError> { // Build Vmm. debug!("event_start: build microvm from snapshot"); + let restored_with_uffd = uffd.as_ref().is_some(); let (mut vmm, mut vcpus) = create_vmm_and_vcpus( instance_info, event_manager, @@ -518,6 +519,7 @@ pub fn build_microvm_from_snapshot( resource_allocator: &mut vmm.resource_allocator, vm_resources, instance_id: &instance_info.id, + restored_with_uffd: &restored_with_uffd, }; vmm.mmio_device_manager = diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 5773fa0ba09..3522b1e24e7 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -214,6 +214,7 @@ pub struct MMIODevManagerConstructorArgs<'a> { pub resource_allocator: &'a mut ResourceAllocator, pub vm_resources: &'a mut VmResources, pub instance_id: &'a str, + pub restored_with_uffd: &'a bool, } impl fmt::Debug for MMIODevManagerConstructorArgs<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -224,6 +225,7 @@ impl fmt::Debug for MMIODevManagerConstructorArgs<'_> { .field("for_each_restored_device", &"?") .field("vm_resources", &self.vm_resources) .field("instance_id", &self.instance_id) + .field("restored_with_uffd", &self.restored_with_uffd) .finish() } } @@ -512,7 +514,10 @@ impl<'a> Persist<'a> for MMIODeviceManager { if let Some(balloon_state) = &state.balloon_device { let device = Arc::new(Mutex::new(Balloon::restore( - BalloonConstructorArgs { mem: mem.clone() }, + BalloonConstructorArgs { + mem: mem.clone(), + restored_with_uffd: *constructor_args.restored_with_uffd, + }, &balloon_state.device_state, )?)); diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs index 697928ae9c6..f4b82445b84 100644 --- a/src/vmm/src/devices/virtio/balloon/device.rs +++ b/src/vmm/src/devices/virtio/balloon/device.rs @@ -165,6 +165,7 @@ pub struct Balloon { // Implementation specific fields. pub(crate) restored: bool, + pub(crate) restored_with_uffd: bool, pub(crate) stats_polling_interval_s: u16, pub(crate) stats_timer: TimerFd, // The index of the previous stats descriptor is saved because @@ -190,6 +191,7 @@ impl fmt::Debug for Balloon { .field("device_state", &self.device_state) .field("irq_trigger", &self.irq_trigger) .field("restored", &self.restored) + .field("restored_with_uffd", &self.restored_with_uffd) .field("stats_polling_interval_s", &self.stats_polling_interval_s) .field("stats_desc_index", &self.stats_desc_index) .field("latest_stats", &self.latest_stats) @@ -205,6 +207,7 @@ impl Balloon { deflate_on_oom: bool, stats_polling_interval_s: u16, restored: bool, + restored_with_uffd: bool, ) -> Result { let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; @@ -246,6 +249,7 @@ impl Balloon { device_state: DeviceState::Inactive, activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, restored, + restored_with_uffd, stats_polling_interval_s, stats_timer, stats_desc_index: None, @@ -356,6 +360,7 @@ impl Balloon { mem, (guest_addr, u64::from(range_len) << VIRTIO_BALLOON_PFN_SHIFT), self.restored, + self.restored_with_uffd, ) { error!("Error removing memory range: {:?}", err); } diff --git a/src/vmm/src/devices/virtio/balloon/persist.rs b/src/vmm/src/devices/virtio/balloon/persist.rs index 4e768ddd2e2..4634d5db18f 100644 --- a/src/vmm/src/devices/virtio/balloon/persist.rs +++ b/src/vmm/src/devices/virtio/balloon/persist.rs @@ -95,6 +95,7 @@ pub struct BalloonState { pub struct BalloonConstructorArgs { /// Pointer to guest memory. pub mem: GuestMemoryMmap, + pub restored_with_uffd: bool, } impl Persist<'_> for Balloon { @@ -121,7 +122,13 @@ impl Persist<'_> for Balloon { ) -> Result { // We can safely create the balloon with arbitrary flags and // num_pages because we will overwrite them after. - let mut balloon = Balloon::new(0, false, state.stats_polling_interval_s, true)?; + let mut balloon = Balloon::new( + 0, + false, + state.stats_polling_interval_s, + true, + constructor_args.restored_with_uffd, + )?; let mut num_queues = BALLOON_NUM_QUEUES; // As per the virtio 1.1 specification, the statistics queue @@ -186,7 +193,7 @@ mod tests { let mut mem = vec![0; 4096]; // Create and save the balloon device. - let balloon = Balloon::new(0x42, false, 2, false).unwrap(); + let balloon = Balloon::new(0x42, false, 2, false, false).unwrap(); Snapshot::serialize(&mut mem.as_mut_slice(), &balloon.save()).unwrap(); diff --git a/src/vmm/src/devices/virtio/balloon/util.rs b/src/vmm/src/devices/virtio/balloon/util.rs index f8cf7aa2000..2c2b0a2ef89 100644 --- a/src/vmm/src/devices/virtio/balloon/util.rs +++ b/src/vmm/src/devices/virtio/balloon/util.rs @@ -69,6 +69,7 @@ pub(crate) fn remove_range( guest_memory: &GuestMemoryMmap, range: (GuestAddress, u64), restored: bool, + uffd: bool, ) -> Result<(), RemoveRegionError> { let (guest_address, range_len) = range; @@ -83,7 +84,11 @@ pub(crate) fn remove_range( // Mmap a new anonymous region over the present one in order to create a hole. // This workaround is (only) needed after resuming from a snapshot because the guest memory // is mmaped from file as private and there is no `madvise` flag that works for this case. - if restored { + // + // Do not apply when using UFFD, or the memory region will be unregistered from UFFD + // and it will no longer receive remove events or subsequent page faults for that memory + // range, making it impossible for the UFFD handler to track removed pages. + if restored && !uffd { // SAFETY: The address and length are known to be valid. let ret = unsafe { libc::mmap( diff --git a/src/vmm/src/vmm_config/balloon.rs b/src/vmm/src/vmm_config/balloon.rs index 5b6f25e8662..c917dba5783 100644 --- a/src/vmm/src/vmm_config/balloon.rs +++ b/src/vmm/src/vmm_config/balloon.rs @@ -99,6 +99,9 @@ impl BalloonBuilder { // `restored` flag is false because this code path // is never called by snapshot restore functionality. false, + // `uffd` flag is false because uffd is only used + // with snapshot restores, which never hits this code path. + false, )?))); Ok(())