From 3af85e728fd768210407a0b773d5a9b60997f83a Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 12 Nov 2024 20:26:31 +0000 Subject: [PATCH 1/3] server: initialize migration targets using their sources' specs Tweak the way VMs are initialized during live migration --- bin/propolis-server/src/lib/migrate/compat.rs | 938 ------------------ .../src/lib/migrate/destination.rs | 60 +- bin/propolis-server/src/lib/migrate/mod.rs | 1 - .../src/lib/migrate/preamble.rs | 74 +- bin/propolis-server/src/lib/spec/mod.rs | 11 - bin/propolis-server/src/lib/vm/ensure.rs | 293 +++--- bin/propolis-server/src/lib/vm/mod.rs | 23 +- .../src/lib/vm/state_driver.rs | 31 +- phd-tests/tests/src/migrate.rs | 50 - 9 files changed, 298 insertions(+), 1183 deletions(-) delete mode 100644 bin/propolis-server/src/lib/migrate/compat.rs diff --git a/bin/propolis-server/src/lib/migrate/compat.rs b/bin/propolis-server/src/lib/migrate/compat.rs deleted file mode 100644 index a2d7fbfe8..000000000 --- a/bin/propolis-server/src/lib/migrate/compat.rs +++ /dev/null @@ -1,938 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Associated functions for the [`crate::spec::Spec`] type that determine -//! whether two specs describe migration-compatible VMs. - -use std::collections::HashMap; - -use crate::spec::{self, SerialPortDevice}; - -use cpuid_utils::CpuidVendor; -use propolis_api_types::instance_spec::{ - components::{ - board::Chipset, - devices::{PciPciBridge, SerialPortNumber}, - }, - PciPath, -}; -use thiserror::Error; - -trait CompatCheck { - type Error; - - fn is_compatible_with(&self, other: &Self) -> Result<(), Self::Error>; -} - -#[derive(Debug, Error)] -pub enum CompatibilityError { - #[error(transparent)] - Board(#[from] BoardIncompatibility), - - #[error(transparent)] - Pvpanic(#[from] PvpanicIncompatibility), - - #[error("collection {0} incompatible")] - Collection(String, #[source] CollectionIncompatibility), - - #[cfg(feature = "falcon")] - #[error("can't migrate instances containing softnpu devices")] - SoftNpu, -} - -#[derive(Debug, Error)] -pub enum BoardIncompatibility { - #[error("boards have different CPU counts (self: {this}, other: {other})")] - CpuCount { this: u8, other: u8 }, - - #[error( - "boards have different memory sizes (self: {this}, other: {other})" - )] - MemorySize { this: u64, other: u64 }, - - #[error( - "chipsets have different PCIe settings (self: {this}, other: {other})" - )] - PcieEnabled { this: bool, other: bool }, - - #[error(transparent)] - Cpuid(#[from] CpuidMismatch), -} - -#[derive(Debug, Error)] -pub enum CpuidMismatch { - #[error("CPUID is explicit in one spec but not the other (self: {this}, other: {other}")] - Explicitness { this: bool, other: bool }, - - #[error( - "CPUID sets have different CPU vendors (self: {this}, other: {other})" - )] - Vendor { this: CpuidVendor, other: CpuidVendor }, - - #[error(transparent)] - LeavesOrValues(#[from] cpuid_utils::CpuidSetMismatch), -} - -#[derive(Debug, Error)] -pub enum DiskIncompatibility { - #[error( - "disks have different device interfaces (self: {this}, other: {other})" - )] - Interface { this: &'static str, other: &'static str }, - - #[error("disks have different PCI paths (self: {this}, other: {other})")] - PciPath { this: PciPath, other: PciPath }, - - #[error( - "disks have different backend names (self: {this:?}, other: {other:?})" - )] - BackendName { this: String, other: String }, - - #[error( - "disks have different backend kinds (self: {this}, other: {other})" - )] - BackendKind { this: &'static str, other: &'static str }, - - #[error( - "disks have different read-only settings (self: {this}, other: {other})" - )] - ReadOnly { this: bool, other: bool }, -} - -#[derive(Debug, Error)] -pub enum NicIncompatibility { - #[error("NICs have different PCI paths (self: {this}, other: {other})")] - PciPath { this: PciPath, other: PciPath }, - - #[error( - "NICs have different backend names (self: {this}, other: {other})" - )] - BackendName { this: String, other: String }, -} - -#[derive(Debug, Error)] -pub enum SerialPortIncompatibility { - #[error("ports have different numbers (self: {this:?}, other: {other:?})")] - Number { this: SerialPortNumber, other: SerialPortNumber }, - - #[error("ports have different devices (self: {this}, other: {other})")] - Device { this: SerialPortDevice, other: SerialPortDevice }, -} - -#[derive(Debug, Error)] -pub enum BridgeIncompatibility { - #[error("bridges have different PCI paths (self: {this}, other: {other})")] - PciPath { this: PciPath, other: PciPath }, - - #[error("bridges have different downstream buses (self: {this}, other: {other})")] - DownstreamBus { this: u8, other: u8 }, -} - -#[derive(Debug, Error)] -pub enum PvpanicIncompatibility { - #[error("pvpanic presence differs (self: {this}, other: {other})")] - Presence { this: bool, other: bool }, - - #[error( - "pvpanic devices have different names (self: {this:?}, other: {other:?})" - )] - Name { this: String, other: String }, - - #[error( - "pvpanic devices have different ISA settings (self: {this}, other: {other})" - )] - EnableIsa { this: bool, other: bool }, -} - -#[derive(Debug, Error)] -pub enum ComponentIncompatibility { - #[error(transparent)] - Board(#[from] BoardIncompatibility), - - #[error(transparent)] - Disk(#[from] DiskIncompatibility), - - #[error(transparent)] - Nic(#[from] NicIncompatibility), - - #[error(transparent)] - SerialPort(#[from] SerialPortIncompatibility), - - #[error(transparent)] - PciPciBridge(#[from] BridgeIncompatibility), -} - -#[derive(Debug, Error)] -pub enum CollectionIncompatibility { - #[error( - "collections have different lengths (self: {this}, other: {other})" - )] - Length { this: usize, other: usize }, - - #[error("collection key {0} present in self but not other")] - KeyAbsent(String), - - #[error("component {0} incompatible")] - Component(String, #[source] ComponentIncompatibility), -} - -impl> CompatCheck - for HashMap -{ - type Error = CollectionIncompatibility; - - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), CollectionIncompatibility> { - if self.len() != other.len() { - return Err(CollectionIncompatibility::Length { - this: self.len(), - other: other.len(), - }); - } - - for (key, this_val) in self.iter() { - let other_val = other.get(key).ok_or_else(|| { - CollectionIncompatibility::KeyAbsent(key.clone()) - })?; - - this_val.is_compatible_with(other_val).map_err(|e| { - CollectionIncompatibility::Component(key.clone(), e) - })?; - } - - Ok(()) - } -} - -impl spec::Spec { - fn is_board_compatible( - &self, - other: &Self, - ) -> Result<(), BoardIncompatibility> { - self.is_chipset_compatible(other)?; - self.is_cpuid_compatible(other)?; - - let this = &self.board; - let other = &other.board; - if this.cpus != other.cpus { - Err(BoardIncompatibility::CpuCount { - this: this.cpus, - other: other.cpus, - }) - } else if this.memory_mb != other.memory_mb { - Err(BoardIncompatibility::MemorySize { - this: this.memory_mb, - other: other.memory_mb, - }) - } else { - Ok(()) - } - } - - fn is_chipset_compatible( - &self, - other: &Self, - ) -> Result<(), BoardIncompatibility> { - let Chipset::I440Fx(this) = self.board.chipset; - let Chipset::I440Fx(other) = other.board.chipset; - - if this.enable_pcie != other.enable_pcie { - Err(BoardIncompatibility::PcieEnabled { - this: this.enable_pcie, - other: other.enable_pcie, - }) - } else { - Ok(()) - } - } - - fn is_cpuid_compatible(&self, other: &Self) -> Result<(), CpuidMismatch> { - match (&self.cpuid, &other.cpuid) { - (None, None) => Ok(()), - (Some(_), None) | (None, Some(_)) => { - Err(CpuidMismatch::Explicitness { - this: self.cpuid.is_some(), - other: other.cpuid.is_some(), - }) - } - (Some(this), Some(other)) => { - if this.vendor() != other.vendor() { - return Err(CpuidMismatch::Vendor { - this: this.vendor(), - other: other.vendor(), - }); - } - - this.is_equivalent_to(other)?; - Ok(()) - } - } - } - - fn is_pvpanic_compatible( - &self, - other: &Self, - ) -> Result<(), PvpanicIncompatibility> { - match (&self.pvpanic, &other.pvpanic) { - (None, None) => Ok(()), - (Some(this), Some(other)) if this.name != other.name => { - Err(PvpanicIncompatibility::Name { - this: this.name.clone(), - other: other.name.clone(), - }) - } - (Some(this), Some(other)) - if this.spec.enable_isa != other.spec.enable_isa => - { - Err(PvpanicIncompatibility::EnableIsa { - this: this.spec.enable_isa, - other: other.spec.enable_isa, - }) - } - (Some(_), Some(_)) => Ok(()), - (this, other) => Err(PvpanicIncompatibility::Presence { - this: this.is_some(), - other: other.is_some(), - }), - } - } - - pub(super) fn is_migration_compatible( - &self, - other: &Self, - ) -> Result<(), CompatibilityError> { - self.is_board_compatible(other)?; - self.disks.is_compatible_with(&other.disks).map_err(|e| { - CompatibilityError::Collection("disks".to_string(), e) - })?; - - self.nics.is_compatible_with(&other.nics).map_err(|e| { - CompatibilityError::Collection("nics".to_string(), e) - })?; - - self.serial.is_compatible_with(&other.serial).map_err(|e| { - CompatibilityError::Collection("serial ports".to_string(), e) - })?; - - self.pci_pci_bridges - .is_compatible_with(&other.pci_pci_bridges) - .map_err(|e| { - CompatibilityError::Collection("PCI bridges".to_string(), e) - })?; - - self.is_pvpanic_compatible(other)?; - - #[cfg(feature = "falcon")] - if self.softnpu.has_components() || other.softnpu.has_components() { - return Err(CompatibilityError::SoftNpu); - } - - Ok(()) - } -} - -impl spec::StorageDevice { - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), DiskIncompatibility> { - if std::mem::discriminant(self) != std::mem::discriminant(other) { - Err(DiskIncompatibility::Interface { - this: self.kind(), - other: other.kind(), - }) - } else if self.pci_path() != other.pci_path() { - Err(DiskIncompatibility::PciPath { - this: self.pci_path(), - other: other.pci_path(), - }) - } else if self.backend_name() != other.backend_name() { - Err(DiskIncompatibility::BackendName { - this: self.backend_name().to_owned(), - other: other.backend_name().to_owned(), - }) - } else { - Ok(()) - } - } -} - -impl spec::StorageBackend { - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), DiskIncompatibility> { - if std::mem::discriminant(self) != std::mem::discriminant(other) { - Err(DiskIncompatibility::BackendKind { - this: self.kind(), - other: other.kind(), - }) - } else if self.read_only() != other.read_only() { - Err(DiskIncompatibility::ReadOnly { - this: self.read_only(), - other: other.read_only(), - }) - } else { - Ok(()) - } - } -} - -impl CompatCheck for spec::Disk { - type Error = ComponentIncompatibility; - - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), ComponentIncompatibility> { - self.device_spec.is_compatible_with(&other.device_spec)?; - self.backend_spec.is_compatible_with(&other.backend_spec)?; - Ok(()) - } -} - -impl CompatCheck for spec::Nic { - type Error = ComponentIncompatibility; - - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), ComponentIncompatibility> { - if self.device_spec.pci_path != other.device_spec.pci_path { - Err(NicIncompatibility::PciPath { - this: self.device_spec.pci_path, - other: other.device_spec.pci_path, - }) - } else if self.device_spec.backend_name - != other.device_spec.backend_name - { - Err(NicIncompatibility::BackendName { - this: self.device_spec.backend_name.clone(), - other: other.device_spec.backend_name.clone(), - }) - } else { - Ok(()) - } - .map_err(ComponentIncompatibility::Nic) - } -} - -impl CompatCheck for spec::SerialPort { - type Error = ComponentIncompatibility; - - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), ComponentIncompatibility> { - if self.num != other.num { - Err(SerialPortIncompatibility::Number { - this: self.num, - other: other.num, - }) - } else if std::mem::discriminant(&self.device) - != std::mem::discriminant(&other.device) - { - Err(SerialPortIncompatibility::Device { - this: self.device, - other: other.device, - }) - } else { - Ok(()) - } - .map_err(ComponentIncompatibility::SerialPort) - } -} - -impl CompatCheck for PciPciBridge { - type Error = ComponentIncompatibility; - - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), ComponentIncompatibility> { - if self.pci_path != other.pci_path { - Err(BridgeIncompatibility::PciPath { - this: self.pci_path, - other: other.pci_path, - }) - } else if self.downstream_bus != other.downstream_bus { - Err(BridgeIncompatibility::DownstreamBus { - this: self.downstream_bus, - other: other.downstream_bus, - }) - } else { - Ok(()) - } - .map_err(ComponentIncompatibility::PciPciBridge) - } -} - -#[cfg(test)] -mod test { - use cpuid_utils::{CpuidIdent, CpuidSet, CpuidValues}; - use propolis_api_types::instance_spec::components::{ - backends::{ - CrucibleStorageBackend, FileStorageBackend, VirtioNetworkBackend, - }, - board::I440Fx, - devices::{ - NvmeDisk, QemuPvpanic as QemuPvpanicDesc, VirtioDisk, VirtioNic, - }, - }; - use spec::{QemuPvpanic, StorageDevice}; - use uuid::Uuid; - - use super::*; - - fn new_spec() -> spec::Spec { - let mut spec = spec::Spec::default(); - spec.board.cpus = 2; - spec.board.memory_mb = 512; - spec - } - - fn file_backend() -> spec::StorageBackend { - spec::StorageBackend::File(FileStorageBackend { - path: "/tmp/file.raw".to_owned(), - readonly: false, - }) - } - - fn crucible_backend() -> spec::StorageBackend { - spec::StorageBackend::Crucible(CrucibleStorageBackend { - request_json: "{}".to_owned(), - readonly: false, - }) - } - - fn nic() -> spec::Nic { - spec::Nic { - device_spec: VirtioNic { - pci_path: PciPath::new(0, 16, 0).unwrap(), - interface_id: Uuid::new_v4(), - backend_name: "vnic".to_owned(), - }, - backend_spec: VirtioNetworkBackend { - vnic_name: "vnic0".to_owned(), - }, - } - } - - fn serial_port() -> spec::SerialPort { - spec::SerialPort { - num: SerialPortNumber::Com1, - device: SerialPortDevice::Uart, - } - } - - fn bridge() -> PciPciBridge { - PciPciBridge { - downstream_bus: 1, - pci_path: PciPath::new(0, 24, 0).unwrap(), - } - } - - #[test] - fn cpu_mismatch() { - let s1 = new_spec(); - let mut s2 = s1.clone(); - s2.board.cpus += 1; - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn memory_mismatch() { - let s1 = new_spec(); - let mut s2 = s1.clone(); - s2.board.memory_mb *= 2; - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn pcie_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - s1.board.chipset = Chipset::I440Fx(I440Fx { enable_pcie: false }); - s2.board.chipset = Chipset::I440Fx(I440Fx { enable_pcie: true }); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn pvpanic_name_mismatch() { - let mut s1 = new_spec(); - s1.pvpanic = Some(QemuPvpanic { - name: "pvpanic1".to_string(), - spec: QemuPvpanicDesc { enable_isa: true }, - }); - let mut s2 = s1.clone(); - s2.pvpanic.as_mut().unwrap().name = "pvpanic2".to_string(); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn pvpanic_enable_isa_mismatch() { - let mut s1 = new_spec(); - s1.pvpanic = Some(QemuPvpanic { - name: "pvpanic".to_string(), - spec: QemuPvpanicDesc { enable_isa: true }, - }); - let mut s2 = s1.clone(); - s2.pvpanic.as_mut().unwrap().spec.enable_isa = false; - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn compatible_disks() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let disk = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_string(), - }), - backend_spec: crucible_backend(), - }; - - s1.disks.insert("disk".to_owned(), disk.clone()); - s2.disks.insert("disk".to_owned(), disk); - assert!(s1.is_migration_compatible(&s2).is_ok()); - } - - #[test] - fn disk_name_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: crucible_backend(), - }; - - s1.disks.insert("disk1".to_owned(), d1.clone()); - s2.disks.insert("disk2".to_owned(), d1); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_length_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: crucible_backend(), - }; - - s1.disks.insert("disk1".to_owned(), d1.clone()); - s2.disks.insert("disk1".to_owned(), d1.clone()); - s2.disks.insert("disk2".to_owned(), d1); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_interface_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: crucible_backend(), - }; - - let mut d2 = d1.clone(); - d2.device_spec = StorageDevice::Nvme(NvmeDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }); - - s1.disks.insert("disk".to_owned(), d1); - s2.disks.insert("disk".to_owned(), d2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_pci_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: crucible_backend(), - }; - - let mut d2 = d1.clone(); - d2.device_spec = StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 5, 0).unwrap(), - backend_name: "backend".to_owned(), - }); - - s1.disks.insert("disk".to_owned(), d1); - s2.disks.insert("disk".to_owned(), d2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_backend_name_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: crucible_backend(), - }; - - let mut d2 = d1.clone(); - d2.device_spec = StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "other_backend".to_owned(), - }); - - s1.disks.insert("disk".to_owned(), d1); - s2.disks.insert("disk".to_owned(), d2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_backend_kind_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: file_backend(), - }; - - let mut d2 = d1.clone(); - d2.backend_spec = crucible_backend(); - s1.disks.insert("disk".to_owned(), d1); - s2.disks.insert("disk".to_owned(), d2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_backend_readonly_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: file_backend(), - }; - - let mut d2 = d1.clone(); - d2.backend_spec = spec::StorageBackend::File(FileStorageBackend { - path: "/tmp/file.raw".to_owned(), - readonly: true, - }); - - s1.disks.insert("disk".to_owned(), d1); - s2.disks.insert("disk".to_owned(), d2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn compatible_nics() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let nic = nic(); - s1.nics.insert("nic".to_owned(), nic.clone()); - s2.nics.insert("nic".to_owned(), nic); - assert!(s1.is_migration_compatible(&s2).is_ok()); - } - - #[test] - fn nic_pci_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let n1 = nic(); - let mut n2 = n1.clone(); - n2.device_spec.pci_path = PciPath::new(0, 24, 0).unwrap(); - s1.nics.insert("nic".to_owned(), n1); - s2.nics.insert("nic".to_owned(), n2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn nic_backend_name_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let n1 = nic(); - let mut n2 = n1.clone(); - "other_backend".clone_into(&mut n2.device_spec.backend_name); - s1.nics.insert("nic".to_owned(), n1); - s2.nics.insert("nic".to_owned(), n2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn compatible_serial_ports() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let serial = serial_port(); - s1.serial.insert("com1".to_owned(), serial.clone()); - s2.serial.insert("com1".to_owned(), serial); - assert!(s1.is_migration_compatible(&s2).is_ok()); - } - - #[test] - fn serial_port_number_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let serial1 = serial_port(); - let mut serial2 = serial1.clone(); - serial2.num = SerialPortNumber::Com2; - s1.serial.insert("com1".to_owned(), serial1); - s2.serial.insert("com1".to_owned(), serial2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn compatible_bridges() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let bridge = bridge(); - s1.pci_pci_bridges.insert("bridge1".to_owned(), bridge); - s2.pci_pci_bridges.insert("bridge1".to_owned(), bridge); - assert!(s1.is_migration_compatible(&s2).is_ok()); - } - - #[test] - fn bridge_downstream_bus_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let b1 = bridge(); - let mut b2 = b1; - b2.downstream_bus += 1; - s1.pci_pci_bridges.insert("bridge1".to_owned(), b1); - s2.pci_pci_bridges.insert("bridge1".to_owned(), b2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn bridge_pci_path_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let b1 = bridge(); - let mut b2 = b1; - b2.pci_path = PciPath::new(0, 30, 0).unwrap(); - s1.pci_pci_bridges.insert("bridge1".to_owned(), b1); - s2.pci_pci_bridges.insert("bridge1".to_owned(), b2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn compatible_cpuid() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let mut set1 = CpuidSet::new(CpuidVendor::Intel); - let mut set2 = CpuidSet::new(CpuidVendor::Intel); - - s1.cpuid = Some(set1.clone()); - s2.cpuid = Some(set2.clone()); - s1.is_migration_compatible(&s2).unwrap(); - - set1.insert(CpuidIdent::leaf(0x1337), CpuidValues::default()).unwrap(); - set2.insert(CpuidIdent::leaf(0x1337), CpuidValues::default()).unwrap(); - - s1.cpuid = Some(set1.clone()); - s2.cpuid = Some(set2.clone()); - s1.is_migration_compatible(&s2).unwrap(); - - let values = CpuidValues { eax: 5, ebx: 6, ecx: 7, edx: 8 }; - set1.insert(CpuidIdent::subleaf(3, 4), values).unwrap(); - set2.insert(CpuidIdent::subleaf(3, 4), values).unwrap(); - s1.is_migration_compatible(&s2).unwrap(); - } - - #[test] - fn cpuid_explicitness_mismatch() { - let mut s1 = new_spec(); - let s2 = s1.clone(); - s1.cpuid = Some(CpuidSet::new(CpuidVendor::Intel)); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn cpuid_vendor_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - s1.cpuid = Some(CpuidSet::new(CpuidVendor::Intel)); - s2.cpuid = Some(CpuidSet::new(CpuidVendor::Amd)); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn cpuid_leaf_set_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let mut set1 = CpuidSet::new(CpuidVendor::Amd); - let mut set2 = CpuidSet::new(CpuidVendor::Amd); - - // Give the first set an entry the second set doesn't have. - set1.insert(CpuidIdent::leaf(0), CpuidValues::default()).unwrap(); - set1.insert(CpuidIdent::leaf(1), CpuidValues::default()).unwrap(); - set2.insert(CpuidIdent::leaf(0), CpuidValues::default()).unwrap(); - - s1.cpuid = Some(set1); - s2.cpuid = Some(set2.clone()); - assert!(s1.is_migration_compatible(&s2).is_err()); - - // Make the sets have the same number of entries, but with a difference - // in which entries they have. - set2.insert(CpuidIdent::leaf(3), CpuidValues::default()).unwrap(); - s2.cpuid = Some(set2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn cpuid_leaf_value_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let mut set1 = CpuidSet::new(CpuidVendor::Amd); - let mut set2 = CpuidSet::new(CpuidVendor::Amd); - - let v1 = CpuidValues { eax: 4, ebx: 5, ecx: 6, edx: 7 }; - let v2 = CpuidValues { eax: 100, ebx: 200, ecx: 300, edx: 400 }; - set1.insert(CpuidIdent::leaf(0), v1).unwrap(); - set2.insert(CpuidIdent::leaf(0), v2).unwrap(); - s1.cpuid = Some(set1); - s2.cpuid = Some(set2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn cpuid_leaf_subleaf_conflict() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let mut set1 = CpuidSet::new(CpuidVendor::Amd); - let mut set2 = CpuidSet::new(CpuidVendor::Amd); - - // Check that leaf 0 with no subleaf is not compatible with leaf 0 and a - // subleaf of 0. These are semantically different: the former matches - // leaf 0 with any subleaf value, while the latter technically matches - // only leaf 0 and subleaf 0 (with leaf-specific behavior if a different - // subleaf is specified). - set1.insert(CpuidIdent::leaf(0), CpuidValues::default()).unwrap(); - set2.insert(CpuidIdent::subleaf(0, 0), CpuidValues::default()).unwrap(); - s1.cpuid = Some(set1); - s2.cpuid = Some(set2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } -} diff --git a/bin/propolis-server/src/lib/migrate/destination.rs b/bin/propolis-server/src/lib/migrate/destination.rs index eb5982f15..30c928d16 100644 --- a/bin/propolis-server/src/lib/migrate/destination.rs +++ b/bin/propolis-server/src/lib/migrate/destination.rs @@ -27,6 +27,7 @@ use crate::migrate::probes; use crate::migrate::{ Device, MigrateError, MigratePhase, MigrateRole, MigrationState, PageIter, }; +use crate::spec::Spec; use crate::vm::ensure::{VmEnsureActive, VmEnsureNotStarted}; use crate::vm::state_publisher::{ ExternalStateUpdate, MigrationStateUpdate, StatePublisher, @@ -171,21 +172,24 @@ impl DestinationProtocol for RonV0 { let result = async { // Run the sync phase to ensure that the source's instance spec is // compatible with the spec supplied in the ensure parameters. - if let Err(e) = self.run_sync_phases(&mut ensure).await { - self.update_state( - ensure.state_publisher(), - MigrationState::Error, - ); - let e = ensure.fail(e.into()).await; - return Err(e - .downcast::() - .expect("original error was a MigrateError")); - } + let spec = match self.run_sync_phases(&mut ensure).await { + Ok(spec) => spec, + Err(e) => { + self.update_state( + ensure.state_publisher(), + MigrationState::Error, + ); + let e = ensure.fail(e.into()).await; + return Err(e + .downcast::() + .expect("original error was a MigrateError")); + } + }; // The sync phase succeeded, so it's OK to go ahead with creating // the objects in the target's instance spec. let mut objects_created = - ensure.create_objects().await.map_err(|e| { + ensure.create_objects_from_spec(spec).await.map_err(|e| { MigrateError::TargetInstanceInitializationFailed( e.to_string(), ) @@ -260,14 +264,14 @@ impl RonV0 { async fn run_sync_phases( &mut self, ensure_ctx: &mut VmEnsureNotStarted<'_>, - ) -> Result<(), MigrateError> { + ) -> Result { let step = MigratePhase::MigrateSync; probes::migrate_phase_begin!(|| { step.to_string() }); - self.sync(ensure_ctx).await?; + let result = self.sync(ensure_ctx).await; probes::migrate_phase_end!(|| { step.to_string() }); - Ok(()) + result } async fn run_import_phases( @@ -330,7 +334,7 @@ impl RonV0 { async fn sync( &mut self, ensure_ctx: &mut VmEnsureNotStarted<'_>, - ) -> Result<(), MigrateError> { + ) -> Result { self.update_state(ensure_ctx.state_publisher(), MigrationState::Sync); let preamble: Preamble = match self.read_msg().await? { codec::Message::Serialized(s) => { @@ -346,18 +350,22 @@ impl RonV0 { }?; info!(self.log(), "Destination read Preamble: {:?}", preamble); - if let Err(e) = - preamble.check_compatibility(&ensure_ctx.instance_spec().clone()) - { - error!( - self.log(), - "source and destination instance specs incompatible"; - "error" => #%e - ); - return Err(MigrateError::InstanceSpecsIncompatible(e.to_string())); - } + let spec = match preamble.get_amended_spec(ensure_ctx.instance_spec()) { + Ok(spec) => spec, + Err(e) => { + error!( + self.log(), + "source and destination instance specs incompatible"; + "error" => #%e + ); + return Err(MigrateError::InstanceSpecsIncompatible( + e.to_string(), + )); + } + }; - self.send_msg(codec::Message::Okay).await + self.send_msg(codec::Message::Okay).await?; + Ok(spec) } async fn ram_push( diff --git a/bin/propolis-server/src/lib/migrate/mod.rs b/bin/propolis-server/src/lib/migrate/mod.rs index 6ccb6f8fc..033fdd0c8 100644 --- a/bin/propolis-server/src/lib/migrate/mod.rs +++ b/bin/propolis-server/src/lib/migrate/mod.rs @@ -12,7 +12,6 @@ use thiserror::Error; use tokio::io::{AsyncRead, AsyncWrite}; mod codec; -mod compat; pub mod destination; mod memx; mod preamble; diff --git a/bin/propolis-server/src/lib/migrate/preamble.rs b/bin/propolis-server/src/lib/migrate/preamble.rs index f80c4a767..082735fc5 100644 --- a/bin/propolis-server/src/lib/migrate/preamble.rs +++ b/bin/propolis-server/src/lib/migrate/preamble.rs @@ -2,10 +2,12 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use propolis_api_types::instance_spec::VersionedInstanceSpec; +use propolis_api_types::instance_spec::{ + v0::ComponentV0, VersionedInstanceSpec, +}; use serde::{Deserialize, Serialize}; -use crate::spec::{self, api_spec_v0::ApiSpecError}; +use crate::spec::{api_spec_v0::ApiSpecError, Spec, StorageBackend}; use super::MigrateError; @@ -20,26 +22,68 @@ impl Preamble { Preamble { instance_spec, blobs: Vec::new() } } - /// Checks to see if the serialized spec in this preamble is compatible with - /// the supplied `other_spec`. + /// Given the instance spec in this preamble (transmitted from the migration + /// source), replace any Crucible and viona backends with their /// /// This check runs on the destination. - pub fn check_compatibility( + pub fn get_amended_spec( self, - other_spec: &spec::Spec, - ) -> Result<(), MigrateError> { - let VersionedInstanceSpec::V0(v0_spec) = self.instance_spec; - let this_spec: spec::Spec = - v0_spec.try_into().map_err(|e: ApiSpecError| { + target_spec: &Spec, + ) -> Result { + let VersionedInstanceSpec::V0(mut source_spec) = self.instance_spec; + for disk in target_spec.disks.values() { + let StorageBackend::Crucible(crucible) = &disk.backend_spec else { + continue; + }; + + let Some(to_amend) = + source_spec.components.get_mut(disk.device_spec.backend_name()) + else { + return Err(MigrateError::InstanceSpecsIncompatible(format!( + "replacement component {} not in source spec", + disk.device_spec.backend_name() + ))); + }; + + if !matches!(to_amend, ComponentV0::CrucibleStorageBackend(_)) { + return Err(MigrateError::InstanceSpecsIncompatible(format!( + "component {} is not a Crucible backend in the source spec", + disk.device_spec.backend_name() + ))); + } + + *to_amend = ComponentV0::CrucibleStorageBackend(crucible.clone()); + } + + for nic in target_spec.nics.values() { + let Some(to_amend) = + source_spec.components.get_mut(&nic.device_spec.backend_name) + else { + return Err(MigrateError::InstanceSpecsIncompatible(format!( + "replacement component {} not in source spec", + nic.device_spec.backend_name + ))); + }; + + if !matches!(to_amend, ComponentV0::VirtioNetworkBackend(_)) { + return Err(MigrateError::InstanceSpecsIncompatible(format!( + "component {} is not a virtio network backend \ + in the source spec", + nic.device_spec.backend_name + ))); + } + + *to_amend = + ComponentV0::VirtioNetworkBackend(nic.backend_spec.clone()); + } + + let amended_spec = + source_spec.try_into().map_err(|e: ApiSpecError| { MigrateError::PreambleParse(e.to_string()) })?; - this_spec.is_migration_compatible(other_spec).map_err(|e| { - MigrateError::InstanceSpecsIncompatible(e.to_string()) - })?; - // TODO: Compare opaque blobs. - Ok(()) + Ok(amended_spec) } } diff --git a/bin/propolis-server/src/lib/spec/mod.rs b/bin/propolis-server/src/lib/spec/mod.rs index 414453553..c87c5a431 100644 --- a/bin/propolis-server/src/lib/spec/mod.rs +++ b/bin/propolis-server/src/lib/spec/mod.rs @@ -294,17 +294,6 @@ pub struct SoftNpu { pub p9fs: Option, } -#[cfg(feature = "falcon")] -impl SoftNpu { - /// Returns `true` if this struct specifies at least one SoftNPU component. - pub fn has_components(&self) -> bool { - self.pci_port.is_some() - || self.p9_device.is_some() - || self.p9fs.is_some() - || !self.ports.is_empty() - } -} - struct ParsedDiskRequest { name: String, disk: Disk, diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index ff4fec107..f3e1cd7dd 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -41,7 +41,10 @@ use crate::{ }, spec::Spec, stats::{create_kstat_sampler, VirtualMachine}, - vm::request_queue::InstanceAutoStart, + vm::{ + request_queue::InstanceAutoStart, VMM_BASE_RT_THREADS, + VMM_MIN_RT_THREADS, + }, }; use super::{ @@ -64,7 +67,13 @@ pub(crate) struct VmEnsureNotStarted<'a> { log: &'a slog::Logger, vm: &'a Arc, ensure_request: &'a VmEnsureRequest, - ensure_options: &'a EnsureOptions, + + // VM objects are created on a separate tokio task from the one that drives + // the instance ensure state machine. This task needs its own copy of the + // ensure options. `EnsureOptions` is not `Clone`, so take a reference to an + // `Arc` wrapper around the options to have something that can be cloned and + // passed to the ensure task. + ensure_options: &'a Arc, ensure_response_tx: InstanceEnsureResponseTx, state_publisher: &'a mut StatePublisher, } @@ -74,7 +83,7 @@ impl<'a> VmEnsureNotStarted<'a> { log: &'a slog::Logger, vm: &'a Arc, ensure_request: &'a VmEnsureRequest, - ensure_options: &'a EnsureOptions, + ensure_options: &'a Arc, ensure_response_tx: InstanceEnsureResponseTx, state_publisher: &'a mut StatePublisher, ) -> Self { @@ -96,10 +105,25 @@ impl<'a> VmEnsureNotStarted<'a> { self.state_publisher } + pub(crate) async fn create_objects_from_request( + self, + ) -> anyhow::Result> { + let spec = self.ensure_request.instance_spec.clone(); + self.create_objects(spec).await + } + + pub(crate) async fn create_objects_from_spec( + self, + spec: Spec, + ) -> anyhow::Result> { + self.create_objects(spec).await + } + /// Creates a set of VM objects using the instance spec stored in this /// ensure request, but does not install them as an active VM. - pub(crate) async fn create_objects( + async fn create_objects( self, + spec: Spec, ) -> anyhow::Result> { debug!(self.log, "creating VM objects"); @@ -111,7 +135,38 @@ impl<'a> VmEnsureNotStarted<'a> { }, )); - match self.initialize_vm_objects_from_spec(&input_queue).await { + // Create the runtime that will host tasks created by VMM components + // (e.g. block device runtime tasks). + let vmm_rt = tokio::runtime::Builder::new_multi_thread() + .thread_name("tokio-rt-vmm") + .worker_threads(usize::max( + VMM_MIN_RT_THREADS, + VMM_BASE_RT_THREADS + spec.board.cpus as usize, + )) + .enable_all() + .build()?; + + let log_for_init = self.log.clone(); + let properties = self.ensure_request.properties.clone(); + let options = self.ensure_options.clone(); + let queue_for_init = input_queue.clone(); + let init_result = vmm_rt + .spawn(async move { + initialize_vm_objects( + log_for_init, + spec, + properties, + options, + queue_for_init, + ) + .await + }) + .await + .map_err(|e| { + anyhow::anyhow!("failed to join VM object creation task: {e}") + })?; + + match init_result { Ok(objects) => { // N.B. Once these `VmObjects` exist, it is no longer safe to // call `vm_init_failed`. @@ -124,6 +179,7 @@ impl<'a> VmEnsureNotStarted<'a> { Ok(VmEnsureObjectsCreated { log: self.log, vm: self.vm, + vmm_rt, ensure_request: self.ensure_request, ensure_options: self.ensure_options, ensure_response_tx: self.ensure_response_tx, @@ -148,114 +204,6 @@ impl<'a> VmEnsureNotStarted<'a> { reason } - - async fn initialize_vm_objects_from_spec( - &self, - event_queue: &Arc, - ) -> anyhow::Result { - let properties = &self.ensure_request.properties; - let spec = &self.ensure_request.instance_spec; - let options = self.ensure_options; - - info!(self.log, "initializing new VM"; - "spec" => #?spec, - "properties" => #?properties, - "use_reservoir" => options.use_reservoir, - "bootrom" => %options.toml_config.bootrom.display()); - - let vmm_log = self.log.new(slog::o!("component" => "vmm")); - - // Set up the 'shell' instance into which the rest of this routine will - // add components. - let machine = build_instance( - &properties.vm_name(), - spec, - options.use_reservoir, - vmm_log, - )?; - - let mut init = MachineInitializer { - log: self.log.clone(), - machine: &machine, - devices: Default::default(), - block_backends: Default::default(), - crucible_backends: Default::default(), - spec, - properties, - toml_config: &options.toml_config, - producer_registry: options.oximeter_registry.clone(), - state: MachineInitializerState::default(), - kstat_sampler: initialize_kstat_sampler( - self.log, - self.instance_spec(), - options.oximeter_registry.clone(), - ), - stats_vm: VirtualMachine::new(spec.board.cpus, properties), - }; - - init.initialize_rom(options.toml_config.bootrom.as_path())?; - let chipset = init.initialize_chipset( - &(event_queue.clone() - as Arc), - )?; - - init.initialize_rtc(&chipset)?; - init.initialize_hpet(); - - let com1 = Arc::new(init.initialize_uart(&chipset)); - let ps2ctrl = init.initialize_ps2(&chipset); - init.initialize_qemu_debug_port()?; - init.initialize_qemu_pvpanic(VirtualMachine::new( - self.instance_spec().board.cpus, - properties, - ))?; - init.initialize_network_devices(&chipset).await?; - - #[cfg(not(feature = "omicron-build"))] - init.initialize_test_devices(&options.toml_config.devices); - #[cfg(feature = "omicron-build")] - info!( - self.log, - "`omicron-build` feature enabled, ignoring any test devices" - ); - - #[cfg(feature = "falcon")] - { - init.initialize_softnpu_ports(&chipset)?; - init.initialize_9pfs(&chipset); - } - - init.initialize_storage_devices(&chipset, options.nexus_client.clone()) - .await?; - - let ramfb = init.initialize_fwcfg(self.instance_spec().board.cpus)?; - init.initialize_cpus().await?; - let vcpu_tasks = Box::new(crate::vcpu_tasks::VcpuTasks::new( - &machine, - event_queue.clone() - as Arc, - self.log.new(slog::o!("component" => "vcpu_tasks")), - )?); - - let MachineInitializer { - devices, - block_backends, - crucible_backends, - .. - } = init; - - Ok(InputVmObjects { - instance_spec: spec.clone(), - vcpu_tasks, - machine, - devices, - block_backends, - crucible_backends, - com1, - framebuffer: Some(ramfb), - ps2ctrl, - }) - } } /// Represents an instance ensure request that has proceeded far enough to @@ -264,6 +212,7 @@ impl<'a> VmEnsureNotStarted<'a> { pub(crate) struct VmEnsureObjectsCreated<'a> { log: &'a slog::Logger, vm: &'a Arc, + vmm_rt: tokio::runtime::Runtime, ensure_request: &'a VmEnsureRequest, ensure_options: &'a EnsureOptions, ensure_response_tx: InstanceEnsureResponseTx, @@ -301,12 +250,14 @@ impl<'a> VmEnsureObjectsCreated<'a> { ) .await; + let vmm_rt_hdl = self.vmm_rt.handle().clone(); self.vm .make_active( self.log, self.input_queue.clone(), &self.vm_objects, vm_services, + self.vmm_rt, ) .await; @@ -325,6 +276,7 @@ impl<'a> VmEnsureObjectsCreated<'a> { VmEnsureActive { vm: self.vm, + vmm_rt_hdl, state_publisher: self.state_publisher, vm_objects: self.vm_objects, input_queue: self.input_queue, @@ -338,12 +290,19 @@ impl<'a> VmEnsureObjectsCreated<'a> { /// not started yet. pub(crate) struct VmEnsureActive<'a> { vm: &'a Arc, + vmm_rt_hdl: tokio::runtime::Handle, state_publisher: &'a mut StatePublisher, vm_objects: Arc, input_queue: Arc, kernel_vm_paused: bool, } +pub(super) struct VmEnsureActiveOutput { + pub vm_objects: Arc, + pub input_queue: Arc, + pub vmm_rt_hdl: tokio::runtime::Handle, +} + impl<'a> VmEnsureActive<'a> { pub(crate) fn vm_objects(&self) -> &Arc { &self.vm_objects @@ -373,11 +332,115 @@ impl<'a> VmEnsureActive<'a> { /// Yields the VM objects and input queue for this VM so that they can be /// used to start a state driver loop. - pub(super) fn into_inner(self) -> (Arc, Arc) { - (self.vm_objects, self.input_queue) + pub(super) fn into_inner(self) -> VmEnsureActiveOutput { + VmEnsureActiveOutput { + vm_objects: self.vm_objects, + input_queue: self.input_queue, + vmm_rt_hdl: self.vmm_rt_hdl, + } } } +async fn initialize_vm_objects( + log: slog::Logger, + spec: Spec, + properties: InstanceProperties, + options: Arc, + event_queue: Arc, +) -> anyhow::Result { + info!(log, "initializing new VM"; + "spec" => #?spec, + "properties" => #?properties, + "use_reservoir" => options.use_reservoir, + "bootrom" => %options.toml_config.bootrom.display()); + + let vmm_log = log.new(slog::o!("component" => "vmm")); + + // Set up the 'shell' instance into which the rest of this routine will + // add components. + let machine = build_instance( + &properties.vm_name(), + &spec, + options.use_reservoir, + vmm_log, + )?; + + let mut init = MachineInitializer { + log: log.clone(), + machine: &machine, + devices: Default::default(), + block_backends: Default::default(), + crucible_backends: Default::default(), + spec: &spec, + properties: &properties, + toml_config: &options.toml_config, + producer_registry: options.oximeter_registry.clone(), + state: MachineInitializerState::default(), + kstat_sampler: initialize_kstat_sampler( + &log, + &spec, + options.oximeter_registry.clone(), + ), + stats_vm: VirtualMachine::new(spec.board.cpus, &properties), + }; + + init.initialize_rom(options.toml_config.bootrom.as_path())?; + let chipset = init.initialize_chipset( + &(event_queue.clone() + as Arc), + )?; + + init.initialize_rtc(&chipset)?; + init.initialize_hpet(); + + let com1 = Arc::new(init.initialize_uart(&chipset)); + let ps2ctrl = init.initialize_ps2(&chipset); + init.initialize_qemu_debug_port()?; + init.initialize_qemu_pvpanic(VirtualMachine::new( + spec.board.cpus, + &properties, + ))?; + init.initialize_network_devices(&chipset).await?; + + #[cfg(not(feature = "omicron-build"))] + init.initialize_test_devices(&options.toml_config.devices); + #[cfg(feature = "omicron-build")] + info!(log, "`omicron-build` feature enabled, ignoring any test devices"); + + #[cfg(feature = "falcon")] + { + init.initialize_softnpu_ports(&chipset)?; + init.initialize_9pfs(&chipset); + } + + init.initialize_storage_devices(&chipset, options.nexus_client.clone()) + .await?; + + let ramfb = init.initialize_fwcfg(spec.board.cpus)?; + init.initialize_cpus().await?; + let vcpu_tasks = Box::new(crate::vcpu_tasks::VcpuTasks::new( + &machine, + event_queue.clone() as Arc, + log.new(slog::o!("component" => "vcpu_tasks")), + )?); + + let MachineInitializer { + devices, block_backends, crucible_backends, .. + } = init; + + Ok(InputVmObjects { + instance_spec: spec.clone(), + vcpu_tasks, + machine, + devices, + block_backends, + crucible_backends, + com1, + framebuffer: Some(ramfb), + ps2ctrl, + }) +} + /// Create an object used to sample kstats. fn initialize_kstat_sampler( log: &slog::Logger, diff --git a/bin/propolis-server/src/lib/vm/mod.rs b/bin/propolis-server/src/lib/vm/mod.rs index fea20119f..abb6cfcff 100644 --- a/bin/propolis-server/src/lib/vm/mod.rs +++ b/bin/propolis-server/src/lib/vm/mod.rs @@ -179,9 +179,6 @@ pub(crate) enum VmError { #[error("Forbidden state change")] ForbiddenStateChange(#[from] request_queue::RequestDeniedReason), - - #[error("Failed to initialize VM's tokio runtime")] - TokioRuntimeInitializationFailed(#[source] std::io::Error), } /// The top-level VM wrapper type. @@ -383,6 +380,7 @@ impl Vm { state_driver_queue: Arc, objects: &Arc, services: services::VmServices, + vmm_rt: tokio::runtime::Runtime, ) { info!(self.log, "installing active VM"); let mut guard = self.inner.write().await; @@ -396,7 +394,7 @@ impl Vm { properties: vm.properties, objects: objects.clone(), services, - tokio_rt: vm.tokio_rt.expect("WaitingForInit has runtime"), + tokio_rt: vmm_rt, }); } state => unreachable!( @@ -561,23 +559,10 @@ impl Vm { _ => {} }; - let thread_count = usize::max( - VMM_MIN_RT_THREADS, - VMM_BASE_RT_THREADS - + ensure_request.instance_spec.board.cpus as usize, - ); - - let tokio_rt = tokio::runtime::Builder::new_multi_thread() - .thread_name("tokio-rt-vmm") - .worker_threads(thread_count) - .enable_all() - .build() - .map_err(VmError::TokioRuntimeInitializationFailed)?; - let properties = ensure_request.properties.clone(); let spec = ensure_request.instance_spec.clone(); let vm_for_driver = self.clone(); - guard.driver = Some(tokio_rt.spawn(async move { + guard.driver = Some(tokio::spawn(async move { state_driver::run_state_driver( log_for_driver, vm_for_driver, @@ -593,7 +578,7 @@ impl Vm { external_state_rx: external_rx.clone(), properties, spec, - tokio_rt: Some(tokio_rt), + tokio_rt: None, }); } diff --git a/bin/propolis-server/src/lib/vm/state_driver.rs b/bin/propolis-server/src/lib/vm/state_driver.rs index a0298b4e4..73e1768be 100644 --- a/bin/propolis-server/src/lib/vm/state_driver.rs +++ b/bin/propolis-server/src/lib/vm/state_driver.rs @@ -27,7 +27,10 @@ use crate::{ }; use super::{ - ensure::{VmEnsureActive, VmEnsureNotStarted, VmEnsureRequest}, + ensure::{ + VmEnsureActive, VmEnsureActiveOutput, VmEnsureNotStarted, + VmEnsureRequest, + }, guest_event::{self, GuestEvent}, objects::VmObjects, request_queue::{self, ExternalRequest, InstanceAutoStart}, @@ -264,6 +267,7 @@ pub(super) async fn run_state_driver( ensure_result_tx: InstanceEnsureResponseTx, ensure_options: super::EnsureOptions, ) -> StateDriverOutput { + let ensure_options = Arc::new(ensure_options); let activated_vm = match create_and_activate_vm( &log, &vm, @@ -284,10 +288,12 @@ pub(super) async fn run_state_driver( } }; - let (objects, input_queue) = activated_vm.into_inner(); + let VmEnsureActiveOutput { vm_objects, input_queue, vmm_rt_hdl } = + activated_vm.into_inner(); + let state_driver = StateDriver { log, - objects, + objects: vm_objects, input_queue, external_state: state_publisher, paused: false, @@ -296,9 +302,18 @@ pub(super) async fn run_state_driver( // Run the VM until it exits, then set rundown on the parent VM so that no // new external callers can access its objects or services. - let output = state_driver.run(ensure_request.migrate.is_some()).await; - vm.set_rundown().await; - output + match vmm_rt_hdl + .spawn(async move { + let output = + state_driver.run(ensure_request.migrate.is_some()).await; + vm.set_rundown().await; + output + }) + .await + { + Ok(output) => output, + Err(e) => panic!("failed to join state driver task: {e}"), + } } /// Processes the supplied `ensure_request` to create a set of VM objects that @@ -309,7 +324,7 @@ async fn create_and_activate_vm<'a>( state_publisher: &'a mut StatePublisher, ensure_request: &'a VmEnsureRequest, ensure_result_tx: InstanceEnsureResponseTx, - ensure_options: &'a super::EnsureOptions, + ensure_options: &'a Arc, ) -> anyhow::Result> { let ensure = VmEnsureNotStarted::new( log, @@ -347,7 +362,7 @@ async fn create_and_activate_vm<'a>( .context("running live migration protocol")?) } else { let created = ensure - .create_objects() + .create_objects_from_request() .await .context("creating VM objects for new instance")?; diff --git a/phd-tests/tests/src/migrate.rs b/phd-tests/tests/src/migrate.rs index 78438be79..a3506d107 100644 --- a/phd-tests/tests/src/migrate.rs +++ b/phd-tests/tests/src/migrate.rs @@ -274,56 +274,6 @@ mod running_process { } } -#[phd_testcase] -async fn incompatible_vms(ctx: &Framework) { - let mut builders = vec![ - ctx.vm_config_builder("migration_incompatible_target_1"), - ctx.vm_config_builder("migration_incompatible_target_2"), - ]; - - builders[0].cpus(8); - builders[1].memory_mib(1024); - - for (i, cfg) in builders.into_iter().enumerate() { - let mut source = ctx - .spawn_vm( - ctx.vm_config_builder(&format!( - "migration_incompatible_source_{}", - i - )) - .cpus(4) - .memory_mib(512), - None, - ) - .await?; - - source.launch().await?; - let mut target = ctx.spawn_vm(&cfg, None).await?; - - let migration_id = Uuid::new_v4(); - assert!(target - .migrate_from(&source, migration_id, MigrationTimeout::default()) - .await - .is_err()); - - let src_migration_state = source - .get_migration_state() - .await? - .migration_out - .expect("source should have a migration-out status") - .state; - assert_eq!(src_migration_state, MigrationState::Error); - - let target_migration_state = target - .get_migration_state() - .await? - .migration_in - .expect("target should have a migration-in status") - .state; - assert_eq!(target_migration_state, MigrationState::Error); - } -} - #[phd_testcase] async fn multiple_migrations(ctx: &Framework) { let mut vm0 = ctx.spawn_default_vm("multiple_migrations_0").await?; From 8aa1010d69cdfa2162bb385cccfc6db18dc2cbc7 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 12 Nov 2024 22:18:21 +0000 Subject: [PATCH 2/3] handle absence of specs during migration --- bin/propolis-cli/src/main.rs | 9 +- bin/propolis-server/src/lib/vm/mod.rs | 128 +++++++++++++++++--------- crates/propolis-api-types/src/lib.rs | 9 +- openapi/propolis-server.json | 38 +++++++- phd-tests/tests/src/cpuid.rs | 11 ++- phd-tests/tests/src/smoke.rs | 8 +- 6 files changed, 151 insertions(+), 52 deletions(-) diff --git a/bin/propolis-cli/src/main.rs b/bin/propolis-cli/src/main.rs index ba00f0a87..152e95178 100644 --- a/bin/propolis-cli/src/main.rs +++ b/bin/propolis-cli/src/main.rs @@ -15,7 +15,9 @@ use anyhow::{anyhow, Context}; use clap::{Parser, Subcommand}; use futures::{future, SinkExt}; use newtype_uuid::{GenericUuid, TypedUuid, TypedUuidKind, TypedUuidTag}; -use propolis_client::types::{InstanceMetadata, VersionedInstanceSpec}; +use propolis_client::types::{ + InstanceMetadata, InstanceSpecStatus, VersionedInstanceSpec, +}; use slog::{o, Drain, Level, Logger}; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio_tungstenite::tungstenite::{ @@ -504,7 +506,10 @@ async fn migrate_instance( anyhow!("failed to get src instance properties") })?; let src_uuid = src_instance.properties.id; - let VersionedInstanceSpec::V0(spec) = &src_instance.spec; + let InstanceSpecStatus::Present(spec) = &src_instance.spec else { + anyhow::bail!("source instance doesn't have a spec yet"); + }; + let VersionedInstanceSpec::V0(spec) = &spec; let request = InstanceEnsureRequest { properties: InstanceProperties { diff --git a/bin/propolis-server/src/lib/vm/mod.rs b/bin/propolis-server/src/lib/vm/mod.rs index abb6cfcff..258c1976f 100644 --- a/bin/propolis-server/src/lib/vm/mod.rs +++ b/bin/propolis-server/src/lib/vm/mod.rs @@ -87,8 +87,8 @@ use oximeter::types::ProducerRegistry; use propolis_api_types::{ instance_spec::VersionedInstanceSpec, InstanceEnsureResponse, InstanceMigrateStatusResponse, InstanceMigrationStatus, InstanceProperties, - InstanceSpecGetResponse, InstanceState, InstanceStateMonitorResponse, - MigrationState, + InstanceSpecGetResponse, InstanceSpecStatus, InstanceState, + InstanceStateMonitorResponse, MigrationState, }; use slog::info; use state_driver::StateDriverOutput; @@ -204,6 +204,29 @@ struct VmInner { driver: Option>, } +/// Stores a possibly-absent instance spec with a reason for its absence. +#[derive(Clone, Debug)] +enum MaybeSpec { + Present(Spec), + + /// The spec is not known yet because the VM is initializing via live + /// migration, and the source's spec is not available yet. + WaitingForMigrationSource, +} + +impl From for InstanceSpecStatus { + fn from(value: MaybeSpec) -> Self { + match value { + MaybeSpec::WaitingForMigrationSource => { + Self::WaitingForMigrationSource + } + MaybeSpec::Present(spec) => { + Self::Present(VersionedInstanceSpec::V0(spec.into())) + } + } + } +} + /// Describes a past or future VM and its properties. struct VmDescription { /// Records the VM's last externally-visible state. @@ -212,9 +235,6 @@ struct VmDescription { /// The VM's API-level instance properties. properties: InstanceProperties, - /// The VM's last-known instance specification. - spec: Spec, - /// The runtime on which the VM's state driver is running (or on which it /// ran). tokio_rt: Option, @@ -228,17 +248,17 @@ enum VmState { /// A new state driver is attempting to initialize objects for a VM with the /// ecnlosed description. - WaitingForInit(VmDescription), + WaitingForInit { vm: VmDescription, spec: MaybeSpec }, /// The VM is active, and callers can obtain a handle to its objects. Active(active::ActiveVm), /// The previous VM is shutting down, but its objects have not been fully /// destroyed yet. - Rundown(VmDescription), + Rundown { vm: VmDescription, spec: Spec }, /// The previous VM and its objects have been cleaned up. - RundownComplete(VmDescription), + RundownComplete { vm: VmDescription, spec: MaybeSpec }, } impl std::fmt::Display for VmState { @@ -248,10 +268,10 @@ impl std::fmt::Display for VmState { "{}", match self { Self::NoVm => "NoVm", - Self::WaitingForInit(_) => "WaitingForInit", + Self::WaitingForInit { .. } => "WaitingForInit", Self::Active(_) => "Active", - Self::Rundown(_) => "Rundown", - Self::RundownComplete(_) => "RundownComplete", + Self::Rundown { .. } => "Rundown", + Self::RundownComplete { .. } => "RundownComplete", } ) } @@ -329,20 +349,26 @@ impl Vm { let state = vm.external_state_rx.borrow().clone(); Some(InstanceSpecGetResponse { properties: vm.properties.clone(), - spec: VersionedInstanceSpec::V0(spec.into()), + spec: InstanceSpecStatus::Present( + VersionedInstanceSpec::V0(spec.into()), + ), state: state.state, }) } - - // If the VM is not active yet, or there is only a - // previously-run-down VM, return the state saved in the state - // machine. - VmState::WaitingForInit(vm) - | VmState::Rundown(vm) - | VmState::RundownComplete(vm) => Some(InstanceSpecGetResponse { + VmState::WaitingForInit { vm, spec } + | VmState::RundownComplete { vm, spec } => { + Some(InstanceSpecGetResponse { + properties: vm.properties.clone(), + state: vm.external_state_rx.borrow().state, + spec: spec.clone().into(), + }) + } + VmState::Rundown { vm, spec } => Some(InstanceSpecGetResponse { properties: vm.properties.clone(), state: vm.external_state_rx.borrow().state, - spec: VersionedInstanceSpec::V0(vm.spec.clone().into()), + spec: InstanceSpecStatus::Present(VersionedInstanceSpec::V0( + spec.clone().into(), + )), }), } } @@ -359,9 +385,9 @@ impl Vm { match &guard.state { VmState::NoVm => None, VmState::Active(vm) => Some(vm.external_state_rx.clone()), - VmState::WaitingForInit(vm) - | VmState::Rundown(vm) - | VmState::RundownComplete(vm) => { + VmState::WaitingForInit { vm, .. } + | VmState::Rundown { vm, .. } + | VmState::RundownComplete { vm, .. } => { Some(vm.external_state_rx.clone()) } } @@ -386,7 +412,7 @@ impl Vm { let mut guard = self.inner.write().await; let old = std::mem::replace(&mut guard.state, VmState::NoVm); match old { - VmState::WaitingForInit(vm) => { + VmState::WaitingForInit { vm, .. } => { guard.state = VmState::Active(ActiveVm { log: log.clone(), state_driver_queue, @@ -417,8 +443,8 @@ impl Vm { let mut guard = self.inner.write().await; let old = std::mem::replace(&mut guard.state, VmState::NoVm); match old { - VmState::WaitingForInit(vm) => { - guard.state = VmState::RundownComplete(vm) + VmState::WaitingForInit { vm, spec } => { + guard.state = VmState::RundownComplete { vm, spec } } state => unreachable!( "start failures should only occur before an active VM is \ @@ -449,12 +475,14 @@ impl Vm { let spec = vm.objects().lock_shared().await.instance_spec().clone(); let ActiveVm { external_state_rx, properties, tokio_rt, .. } = vm; - guard.state = VmState::Rundown(VmDescription { - external_state_rx, - properties, + guard.state = VmState::Rundown { + vm: VmDescription { + external_state_rx, + properties, + tokio_rt: Some(tokio_rt), + }, spec, - tokio_rt: Some(tokio_rt), - }); + }; vm.services }; @@ -473,9 +501,12 @@ impl Vm { let mut guard = self.inner.write().await; let old = std::mem::replace(&mut guard.state, VmState::NoVm); let rt = match old { - VmState::Rundown(mut vm) => { + VmState::Rundown { mut vm, spec } => { let rt = vm.tokio_rt.take().expect("rundown VM has a runtime"); - guard.state = VmState::RundownComplete(vm); + guard.state = VmState::RundownComplete { + vm, + spec: MaybeSpec::Present(spec), + }; rt } state => unreachable!( @@ -551,16 +582,25 @@ impl Vm { { let mut guard = self.inner.write().await; match guard.state { - VmState::WaitingForInit(_) => { - return Err(VmError::WaitingToInitialize); + VmState::WaitingForInit { .. } => { + return Err(VmError::WaitingToInitialize) + } + VmState::Active { .. } => { + return Err(VmError::AlreadyInitialized) + } + VmState::Rundown { .. } => { + return Err(VmError::RundownInProgress) } - VmState::Active(_) => return Err(VmError::AlreadyInitialized), - VmState::Rundown(_) => return Err(VmError::RundownInProgress), _ => {} }; let properties = ensure_request.properties.clone(); - let spec = ensure_request.instance_spec.clone(); + let spec = if ensure_request.migrate.is_some() { + MaybeSpec::WaitingForMigrationSource + } else { + MaybeSpec::Present(ensure_request.instance_spec.clone()) + }; + let vm_for_driver = self.clone(); guard.driver = Some(tokio::spawn(async move { state_driver::run_state_driver( @@ -574,12 +614,14 @@ impl Vm { .await })); - guard.state = VmState::WaitingForInit(VmDescription { - external_state_rx: external_rx.clone(), - properties, + guard.state = VmState::WaitingForInit { + vm: VmDescription { + external_state_rx: external_rx.clone(), + properties, + tokio_rt: None, + }, spec, - tokio_rt: None, - }); + }; } // Wait for the state driver task to dispose of this request. diff --git a/crates/propolis-api-types/src/lib.rs b/crates/propolis-api-types/src/lib.rs index 44bd7a68d..db007667c 100644 --- a/crates/propolis-api-types/src/lib.rs +++ b/crates/propolis-api-types/src/lib.rs @@ -165,11 +165,18 @@ pub struct InstanceGetResponse { pub instance: Instance, } +#[derive(Clone, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "type", content = "value")] +pub enum InstanceSpecStatus { + WaitingForMigrationSource, + Present(VersionedInstanceSpec), +} + #[derive(Clone, Deserialize, Serialize, JsonSchema)] pub struct InstanceSpecGetResponse { pub properties: InstanceProperties, pub state: InstanceState, - pub spec: VersionedInstanceSpec, + pub spec: InstanceSpecStatus, } #[derive(Clone, Deserialize, Serialize, JsonSchema)] diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index 964b630c8..aba9a1c03 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -1431,7 +1431,7 @@ "$ref": "#/components/schemas/InstanceProperties" }, "spec": { - "$ref": "#/components/schemas/VersionedInstanceSpec" + "$ref": "#/components/schemas/InstanceSpecStatus" }, "state": { "$ref": "#/components/schemas/InstanceState" @@ -1443,6 +1443,42 @@ "state" ] }, + "InstanceSpecStatus": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "WaitingForMigrationSource" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "Present" + ] + }, + "value": { + "$ref": "#/components/schemas/VersionedInstanceSpec" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, "InstanceSpecV0": { "type": "object", "properties": { diff --git a/phd-tests/tests/src/cpuid.rs b/phd-tests/tests/src/cpuid.rs index 3ec773761..9d1a4e7c0 100644 --- a/phd-tests/tests/src/cpuid.rs +++ b/phd-tests/tests/src/cpuid.rs @@ -4,7 +4,9 @@ use cpuid_utils::{CpuidIdent, CpuidValues, CpuidVendor}; use phd_testcase::*; -use propolis_client::types::CpuidEntry; +use propolis_client::types::{ + CpuidEntry, InstanceSpecStatus, VersionedInstanceSpec, +}; use tracing::info; fn cpuid_entry( @@ -34,8 +36,11 @@ async fn cpuid_instance_spec_round_trip_test(ctx: &Framework) { vm.launch().await?; let spec_get_response = vm.get_spec().await?; - let propolis_client::types::VersionedInstanceSpec::V0(spec) = - spec_get_response.spec; + let InstanceSpecStatus::Present(VersionedInstanceSpec::V0(spec)) = + spec_get_response.spec + else { + panic!("instance spec should be present for a running VM"); + }; let cpuid = spec.board.cpuid.expect("board should have explicit CPUID"); assert_eq!(cpuid.entries.len(), entries.len()); diff --git a/phd-tests/tests/src/smoke.rs b/phd-tests/tests/src/smoke.rs index 592145b60..b3a14d3d4 100644 --- a/phd-tests/tests/src/smoke.rs +++ b/phd-tests/tests/src/smoke.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use phd_testcase::*; +use propolis_client::types::{InstanceSpecStatus, VersionedInstanceSpec}; #[phd_testcase] async fn nproc_test(ctx: &Framework) { @@ -46,8 +47,11 @@ async fn instance_spec_get_test(ctx: &Framework) { vm.launch().await?; let spec_get_response = vm.get_spec().await?; - let propolis_client::types::VersionedInstanceSpec::V0(spec) = - spec_get_response.spec; + let InstanceSpecStatus::Present(spec) = spec_get_response.spec else { + panic!("launched instance should have a spec"); + }; + + let VersionedInstanceSpec::V0(spec) = spec; assert_eq!(spec.board.cpus, 4); assert_eq!(spec.board.memory_mb, 3072); } From 35dd6bcee549c12b586aba59317a49bd566040e4 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 13 Nov 2024 00:03:11 +0000 Subject: [PATCH 3/3] fix broken comment --- .../src/lib/migrate/destination.rs | 2 +- bin/propolis-server/src/lib/migrate/preamble.rs | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/bin/propolis-server/src/lib/migrate/destination.rs b/bin/propolis-server/src/lib/migrate/destination.rs index 30c928d16..9156a02b2 100644 --- a/bin/propolis-server/src/lib/migrate/destination.rs +++ b/bin/propolis-server/src/lib/migrate/destination.rs @@ -350,7 +350,7 @@ impl RonV0 { }?; info!(self.log(), "Destination read Preamble: {:?}", preamble); - let spec = match preamble.get_amended_spec(ensure_ctx.instance_spec()) { + let spec = match preamble.amend_spec(ensure_ctx.instance_spec()) { Ok(spec) => spec, Err(e) => { error!( diff --git a/bin/propolis-server/src/lib/migrate/preamble.rs b/bin/propolis-server/src/lib/migrate/preamble.rs index 082735fc5..6f0280401 100644 --- a/bin/propolis-server/src/lib/migrate/preamble.rs +++ b/bin/propolis-server/src/lib/migrate/preamble.rs @@ -22,14 +22,15 @@ impl Preamble { Preamble { instance_spec, blobs: Vec::new() } } - /// Given the instance spec in this preamble (transmitted from the migration - /// source), replace any Crucible and viona backends with their + /// Consume the spec in this Preamble and produce an instance spec suitable + /// for initializing the target VM. /// - /// This check runs on the destination. - pub fn get_amended_spec( - self, - target_spec: &Spec, - ) -> Result { + /// This routine enumerates the disks and NICs in the `target_spec` and + /// looks for disks with a Crucible backend and NICs with a viona backend. + /// Any such backends will replace the corresponding backend entries in the + /// source spec. If the target spec contains a replacement backend that is + /// not present in the source spec, this routine fails. + pub fn amend_spec(self, target_spec: &Spec) -> Result { let VersionedInstanceSpec::V0(mut source_spec) = self.instance_spec; for disk in target_spec.disks.values() { let StorageBackend::Crucible(crucible) = &disk.backend_spec else {