From bf85e354e961ffa9f56de484f3b034c489b5b225 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 17 Dec 2024 09:50:39 -0800 Subject: [PATCH] server: get migration target specs from their sources (#807) Change how migration targets decide what VM configuration to use: instead of having the client send a configuration as part of its request to start migration, have the target inherit the configuration the source sends in the live migration preamble. The benefits are that - Clients (i.e. the control plane) no longer have to remember how they configured their migration sources, and - Most migration compatibility checks are no longer needed--the target's configuration is compatible with the source's because it *is* the source's configuration. Some source components do need to be reconfigured over a migration. These include Crucible and viona backends: Crucible backends need new generation numbers for each client, and viona backends may change the names of the host VNICs to which they bind when a VM moves from host to host. To accommodate this, make the sync phase substitute the target VM's Crucible and viona backend configuration into the configuration received in the preamble. This will be formalized more in subsequent changes. The eventual goal is to change the request-migration API so that clients don't send targets any instance spec at all. When this happens, targets won't have a spec until the migration protocol has already begun to run. To prepare for this: - Create the VMM's tokio runtime just before creating VM components instead of creating it up front and running the entire VM initialization process on it. This is needed because the runtime's thread count depends on the number of vCPUs in the VM, and that information comes from the instance spec. - Slightly adjust the `instance_spec_get` API to reflect that a spec might not be available if a VM is currently initializing via live migration. Tests: cargo test, PHD. --- bin/propolis-cli/src/main.rs | 9 +- bin/propolis-server/src/lib/migrate/compat.rs | 938 ------------------ .../src/lib/migrate/destination.rs | 60 +- bin/propolis-server/src/lib/migrate/mod.rs | 1 - .../src/lib/migrate/preamble.rs | 79 +- bin/propolis-server/src/lib/spec/mod.rs | 11 - bin/propolis-server/src/lib/vm/ensure.rs | 293 +++--- bin/propolis-server/src/lib/vm/mod.rs | 149 +-- .../src/lib/vm/state_driver.rs | 31 +- crates/propolis-api-types/src/lib.rs | 9 +- openapi/propolis-server.json | 38 +- phd-tests/tests/src/cpuid.rs | 11 +- phd-tests/tests/src/migrate.rs | 50 - phd-tests/tests/src/smoke.rs | 8 +- 14 files changed, 451 insertions(+), 1236 deletions(-) delete mode 100644 bin/propolis-server/src/lib/migrate/compat.rs diff --git a/bin/propolis-cli/src/main.rs b/bin/propolis-cli/src/main.rs index ba00f0a87..152e95178 100644 --- a/bin/propolis-cli/src/main.rs +++ b/bin/propolis-cli/src/main.rs @@ -15,7 +15,9 @@ use anyhow::{anyhow, Context}; use clap::{Parser, Subcommand}; use futures::{future, SinkExt}; use newtype_uuid::{GenericUuid, TypedUuid, TypedUuidKind, TypedUuidTag}; -use propolis_client::types::{InstanceMetadata, VersionedInstanceSpec}; +use propolis_client::types::{ + InstanceMetadata, InstanceSpecStatus, VersionedInstanceSpec, +}; use slog::{o, Drain, Level, Logger}; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio_tungstenite::tungstenite::{ @@ -504,7 +506,10 @@ async fn migrate_instance( anyhow!("failed to get src instance properties") })?; let src_uuid = src_instance.properties.id; - let VersionedInstanceSpec::V0(spec) = &src_instance.spec; + let InstanceSpecStatus::Present(spec) = &src_instance.spec else { + anyhow::bail!("source instance doesn't have a spec yet"); + }; + let VersionedInstanceSpec::V0(spec) = &spec; let request = InstanceEnsureRequest { properties: InstanceProperties { diff --git a/bin/propolis-server/src/lib/migrate/compat.rs b/bin/propolis-server/src/lib/migrate/compat.rs deleted file mode 100644 index a2d7fbfe8..000000000 --- a/bin/propolis-server/src/lib/migrate/compat.rs +++ /dev/null @@ -1,938 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Associated functions for the [`crate::spec::Spec`] type that determine -//! whether two specs describe migration-compatible VMs. - -use std::collections::HashMap; - -use crate::spec::{self, SerialPortDevice}; - -use cpuid_utils::CpuidVendor; -use propolis_api_types::instance_spec::{ - components::{ - board::Chipset, - devices::{PciPciBridge, SerialPortNumber}, - }, - PciPath, -}; -use thiserror::Error; - -trait CompatCheck { - type Error; - - fn is_compatible_with(&self, other: &Self) -> Result<(), Self::Error>; -} - -#[derive(Debug, Error)] -pub enum CompatibilityError { - #[error(transparent)] - Board(#[from] BoardIncompatibility), - - #[error(transparent)] - Pvpanic(#[from] PvpanicIncompatibility), - - #[error("collection {0} incompatible")] - Collection(String, #[source] CollectionIncompatibility), - - #[cfg(feature = "falcon")] - #[error("can't migrate instances containing softnpu devices")] - SoftNpu, -} - -#[derive(Debug, Error)] -pub enum BoardIncompatibility { - #[error("boards have different CPU counts (self: {this}, other: {other})")] - CpuCount { this: u8, other: u8 }, - - #[error( - "boards have different memory sizes (self: {this}, other: {other})" - )] - MemorySize { this: u64, other: u64 }, - - #[error( - "chipsets have different PCIe settings (self: {this}, other: {other})" - )] - PcieEnabled { this: bool, other: bool }, - - #[error(transparent)] - Cpuid(#[from] CpuidMismatch), -} - -#[derive(Debug, Error)] -pub enum CpuidMismatch { - #[error("CPUID is explicit in one spec but not the other (self: {this}, other: {other}")] - Explicitness { this: bool, other: bool }, - - #[error( - "CPUID sets have different CPU vendors (self: {this}, other: {other})" - )] - Vendor { this: CpuidVendor, other: CpuidVendor }, - - #[error(transparent)] - LeavesOrValues(#[from] cpuid_utils::CpuidSetMismatch), -} - -#[derive(Debug, Error)] -pub enum DiskIncompatibility { - #[error( - "disks have different device interfaces (self: {this}, other: {other})" - )] - Interface { this: &'static str, other: &'static str }, - - #[error("disks have different PCI paths (self: {this}, other: {other})")] - PciPath { this: PciPath, other: PciPath }, - - #[error( - "disks have different backend names (self: {this:?}, other: {other:?})" - )] - BackendName { this: String, other: String }, - - #[error( - "disks have different backend kinds (self: {this}, other: {other})" - )] - BackendKind { this: &'static str, other: &'static str }, - - #[error( - "disks have different read-only settings (self: {this}, other: {other})" - )] - ReadOnly { this: bool, other: bool }, -} - -#[derive(Debug, Error)] -pub enum NicIncompatibility { - #[error("NICs have different PCI paths (self: {this}, other: {other})")] - PciPath { this: PciPath, other: PciPath }, - - #[error( - "NICs have different backend names (self: {this}, other: {other})" - )] - BackendName { this: String, other: String }, -} - -#[derive(Debug, Error)] -pub enum SerialPortIncompatibility { - #[error("ports have different numbers (self: {this:?}, other: {other:?})")] - Number { this: SerialPortNumber, other: SerialPortNumber }, - - #[error("ports have different devices (self: {this}, other: {other})")] - Device { this: SerialPortDevice, other: SerialPortDevice }, -} - -#[derive(Debug, Error)] -pub enum BridgeIncompatibility { - #[error("bridges have different PCI paths (self: {this}, other: {other})")] - PciPath { this: PciPath, other: PciPath }, - - #[error("bridges have different downstream buses (self: {this}, other: {other})")] - DownstreamBus { this: u8, other: u8 }, -} - -#[derive(Debug, Error)] -pub enum PvpanicIncompatibility { - #[error("pvpanic presence differs (self: {this}, other: {other})")] - Presence { this: bool, other: bool }, - - #[error( - "pvpanic devices have different names (self: {this:?}, other: {other:?})" - )] - Name { this: String, other: String }, - - #[error( - "pvpanic devices have different ISA settings (self: {this}, other: {other})" - )] - EnableIsa { this: bool, other: bool }, -} - -#[derive(Debug, Error)] -pub enum ComponentIncompatibility { - #[error(transparent)] - Board(#[from] BoardIncompatibility), - - #[error(transparent)] - Disk(#[from] DiskIncompatibility), - - #[error(transparent)] - Nic(#[from] NicIncompatibility), - - #[error(transparent)] - SerialPort(#[from] SerialPortIncompatibility), - - #[error(transparent)] - PciPciBridge(#[from] BridgeIncompatibility), -} - -#[derive(Debug, Error)] -pub enum CollectionIncompatibility { - #[error( - "collections have different lengths (self: {this}, other: {other})" - )] - Length { this: usize, other: usize }, - - #[error("collection key {0} present in self but not other")] - KeyAbsent(String), - - #[error("component {0} incompatible")] - Component(String, #[source] ComponentIncompatibility), -} - -impl> CompatCheck - for HashMap -{ - type Error = CollectionIncompatibility; - - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), CollectionIncompatibility> { - if self.len() != other.len() { - return Err(CollectionIncompatibility::Length { - this: self.len(), - other: other.len(), - }); - } - - for (key, this_val) in self.iter() { - let other_val = other.get(key).ok_or_else(|| { - CollectionIncompatibility::KeyAbsent(key.clone()) - })?; - - this_val.is_compatible_with(other_val).map_err(|e| { - CollectionIncompatibility::Component(key.clone(), e) - })?; - } - - Ok(()) - } -} - -impl spec::Spec { - fn is_board_compatible( - &self, - other: &Self, - ) -> Result<(), BoardIncompatibility> { - self.is_chipset_compatible(other)?; - self.is_cpuid_compatible(other)?; - - let this = &self.board; - let other = &other.board; - if this.cpus != other.cpus { - Err(BoardIncompatibility::CpuCount { - this: this.cpus, - other: other.cpus, - }) - } else if this.memory_mb != other.memory_mb { - Err(BoardIncompatibility::MemorySize { - this: this.memory_mb, - other: other.memory_mb, - }) - } else { - Ok(()) - } - } - - fn is_chipset_compatible( - &self, - other: &Self, - ) -> Result<(), BoardIncompatibility> { - let Chipset::I440Fx(this) = self.board.chipset; - let Chipset::I440Fx(other) = other.board.chipset; - - if this.enable_pcie != other.enable_pcie { - Err(BoardIncompatibility::PcieEnabled { - this: this.enable_pcie, - other: other.enable_pcie, - }) - } else { - Ok(()) - } - } - - fn is_cpuid_compatible(&self, other: &Self) -> Result<(), CpuidMismatch> { - match (&self.cpuid, &other.cpuid) { - (None, None) => Ok(()), - (Some(_), None) | (None, Some(_)) => { - Err(CpuidMismatch::Explicitness { - this: self.cpuid.is_some(), - other: other.cpuid.is_some(), - }) - } - (Some(this), Some(other)) => { - if this.vendor() != other.vendor() { - return Err(CpuidMismatch::Vendor { - this: this.vendor(), - other: other.vendor(), - }); - } - - this.is_equivalent_to(other)?; - Ok(()) - } - } - } - - fn is_pvpanic_compatible( - &self, - other: &Self, - ) -> Result<(), PvpanicIncompatibility> { - match (&self.pvpanic, &other.pvpanic) { - (None, None) => Ok(()), - (Some(this), Some(other)) if this.name != other.name => { - Err(PvpanicIncompatibility::Name { - this: this.name.clone(), - other: other.name.clone(), - }) - } - (Some(this), Some(other)) - if this.spec.enable_isa != other.spec.enable_isa => - { - Err(PvpanicIncompatibility::EnableIsa { - this: this.spec.enable_isa, - other: other.spec.enable_isa, - }) - } - (Some(_), Some(_)) => Ok(()), - (this, other) => Err(PvpanicIncompatibility::Presence { - this: this.is_some(), - other: other.is_some(), - }), - } - } - - pub(super) fn is_migration_compatible( - &self, - other: &Self, - ) -> Result<(), CompatibilityError> { - self.is_board_compatible(other)?; - self.disks.is_compatible_with(&other.disks).map_err(|e| { - CompatibilityError::Collection("disks".to_string(), e) - })?; - - self.nics.is_compatible_with(&other.nics).map_err(|e| { - CompatibilityError::Collection("nics".to_string(), e) - })?; - - self.serial.is_compatible_with(&other.serial).map_err(|e| { - CompatibilityError::Collection("serial ports".to_string(), e) - })?; - - self.pci_pci_bridges - .is_compatible_with(&other.pci_pci_bridges) - .map_err(|e| { - CompatibilityError::Collection("PCI bridges".to_string(), e) - })?; - - self.is_pvpanic_compatible(other)?; - - #[cfg(feature = "falcon")] - if self.softnpu.has_components() || other.softnpu.has_components() { - return Err(CompatibilityError::SoftNpu); - } - - Ok(()) - } -} - -impl spec::StorageDevice { - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), DiskIncompatibility> { - if std::mem::discriminant(self) != std::mem::discriminant(other) { - Err(DiskIncompatibility::Interface { - this: self.kind(), - other: other.kind(), - }) - } else if self.pci_path() != other.pci_path() { - Err(DiskIncompatibility::PciPath { - this: self.pci_path(), - other: other.pci_path(), - }) - } else if self.backend_name() != other.backend_name() { - Err(DiskIncompatibility::BackendName { - this: self.backend_name().to_owned(), - other: other.backend_name().to_owned(), - }) - } else { - Ok(()) - } - } -} - -impl spec::StorageBackend { - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), DiskIncompatibility> { - if std::mem::discriminant(self) != std::mem::discriminant(other) { - Err(DiskIncompatibility::BackendKind { - this: self.kind(), - other: other.kind(), - }) - } else if self.read_only() != other.read_only() { - Err(DiskIncompatibility::ReadOnly { - this: self.read_only(), - other: other.read_only(), - }) - } else { - Ok(()) - } - } -} - -impl CompatCheck for spec::Disk { - type Error = ComponentIncompatibility; - - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), ComponentIncompatibility> { - self.device_spec.is_compatible_with(&other.device_spec)?; - self.backend_spec.is_compatible_with(&other.backend_spec)?; - Ok(()) - } -} - -impl CompatCheck for spec::Nic { - type Error = ComponentIncompatibility; - - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), ComponentIncompatibility> { - if self.device_spec.pci_path != other.device_spec.pci_path { - Err(NicIncompatibility::PciPath { - this: self.device_spec.pci_path, - other: other.device_spec.pci_path, - }) - } else if self.device_spec.backend_name - != other.device_spec.backend_name - { - Err(NicIncompatibility::BackendName { - this: self.device_spec.backend_name.clone(), - other: other.device_spec.backend_name.clone(), - }) - } else { - Ok(()) - } - .map_err(ComponentIncompatibility::Nic) - } -} - -impl CompatCheck for spec::SerialPort { - type Error = ComponentIncompatibility; - - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), ComponentIncompatibility> { - if self.num != other.num { - Err(SerialPortIncompatibility::Number { - this: self.num, - other: other.num, - }) - } else if std::mem::discriminant(&self.device) - != std::mem::discriminant(&other.device) - { - Err(SerialPortIncompatibility::Device { - this: self.device, - other: other.device, - }) - } else { - Ok(()) - } - .map_err(ComponentIncompatibility::SerialPort) - } -} - -impl CompatCheck for PciPciBridge { - type Error = ComponentIncompatibility; - - fn is_compatible_with( - &self, - other: &Self, - ) -> Result<(), ComponentIncompatibility> { - if self.pci_path != other.pci_path { - Err(BridgeIncompatibility::PciPath { - this: self.pci_path, - other: other.pci_path, - }) - } else if self.downstream_bus != other.downstream_bus { - Err(BridgeIncompatibility::DownstreamBus { - this: self.downstream_bus, - other: other.downstream_bus, - }) - } else { - Ok(()) - } - .map_err(ComponentIncompatibility::PciPciBridge) - } -} - -#[cfg(test)] -mod test { - use cpuid_utils::{CpuidIdent, CpuidSet, CpuidValues}; - use propolis_api_types::instance_spec::components::{ - backends::{ - CrucibleStorageBackend, FileStorageBackend, VirtioNetworkBackend, - }, - board::I440Fx, - devices::{ - NvmeDisk, QemuPvpanic as QemuPvpanicDesc, VirtioDisk, VirtioNic, - }, - }; - use spec::{QemuPvpanic, StorageDevice}; - use uuid::Uuid; - - use super::*; - - fn new_spec() -> spec::Spec { - let mut spec = spec::Spec::default(); - spec.board.cpus = 2; - spec.board.memory_mb = 512; - spec - } - - fn file_backend() -> spec::StorageBackend { - spec::StorageBackend::File(FileStorageBackend { - path: "/tmp/file.raw".to_owned(), - readonly: false, - }) - } - - fn crucible_backend() -> spec::StorageBackend { - spec::StorageBackend::Crucible(CrucibleStorageBackend { - request_json: "{}".to_owned(), - readonly: false, - }) - } - - fn nic() -> spec::Nic { - spec::Nic { - device_spec: VirtioNic { - pci_path: PciPath::new(0, 16, 0).unwrap(), - interface_id: Uuid::new_v4(), - backend_name: "vnic".to_owned(), - }, - backend_spec: VirtioNetworkBackend { - vnic_name: "vnic0".to_owned(), - }, - } - } - - fn serial_port() -> spec::SerialPort { - spec::SerialPort { - num: SerialPortNumber::Com1, - device: SerialPortDevice::Uart, - } - } - - fn bridge() -> PciPciBridge { - PciPciBridge { - downstream_bus: 1, - pci_path: PciPath::new(0, 24, 0).unwrap(), - } - } - - #[test] - fn cpu_mismatch() { - let s1 = new_spec(); - let mut s2 = s1.clone(); - s2.board.cpus += 1; - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn memory_mismatch() { - let s1 = new_spec(); - let mut s2 = s1.clone(); - s2.board.memory_mb *= 2; - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn pcie_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - s1.board.chipset = Chipset::I440Fx(I440Fx { enable_pcie: false }); - s2.board.chipset = Chipset::I440Fx(I440Fx { enable_pcie: true }); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn pvpanic_name_mismatch() { - let mut s1 = new_spec(); - s1.pvpanic = Some(QemuPvpanic { - name: "pvpanic1".to_string(), - spec: QemuPvpanicDesc { enable_isa: true }, - }); - let mut s2 = s1.clone(); - s2.pvpanic.as_mut().unwrap().name = "pvpanic2".to_string(); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn pvpanic_enable_isa_mismatch() { - let mut s1 = new_spec(); - s1.pvpanic = Some(QemuPvpanic { - name: "pvpanic".to_string(), - spec: QemuPvpanicDesc { enable_isa: true }, - }); - let mut s2 = s1.clone(); - s2.pvpanic.as_mut().unwrap().spec.enable_isa = false; - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn compatible_disks() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let disk = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_string(), - }), - backend_spec: crucible_backend(), - }; - - s1.disks.insert("disk".to_owned(), disk.clone()); - s2.disks.insert("disk".to_owned(), disk); - assert!(s1.is_migration_compatible(&s2).is_ok()); - } - - #[test] - fn disk_name_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: crucible_backend(), - }; - - s1.disks.insert("disk1".to_owned(), d1.clone()); - s2.disks.insert("disk2".to_owned(), d1); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_length_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: crucible_backend(), - }; - - s1.disks.insert("disk1".to_owned(), d1.clone()); - s2.disks.insert("disk1".to_owned(), d1.clone()); - s2.disks.insert("disk2".to_owned(), d1); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_interface_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: crucible_backend(), - }; - - let mut d2 = d1.clone(); - d2.device_spec = StorageDevice::Nvme(NvmeDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }); - - s1.disks.insert("disk".to_owned(), d1); - s2.disks.insert("disk".to_owned(), d2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_pci_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: crucible_backend(), - }; - - let mut d2 = d1.clone(); - d2.device_spec = StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 5, 0).unwrap(), - backend_name: "backend".to_owned(), - }); - - s1.disks.insert("disk".to_owned(), d1); - s2.disks.insert("disk".to_owned(), d2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_backend_name_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: crucible_backend(), - }; - - let mut d2 = d1.clone(); - d2.device_spec = StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "other_backend".to_owned(), - }); - - s1.disks.insert("disk".to_owned(), d1); - s2.disks.insert("disk".to_owned(), d2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_backend_kind_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: file_backend(), - }; - - let mut d2 = d1.clone(); - d2.backend_spec = crucible_backend(); - s1.disks.insert("disk".to_owned(), d1); - s2.disks.insert("disk".to_owned(), d2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn disk_backend_readonly_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let d1 = spec::Disk { - device_spec: StorageDevice::Virtio(VirtioDisk { - pci_path: PciPath::new(0, 4, 0).unwrap(), - backend_name: "backend".to_owned(), - }), - backend_spec: file_backend(), - }; - - let mut d2 = d1.clone(); - d2.backend_spec = spec::StorageBackend::File(FileStorageBackend { - path: "/tmp/file.raw".to_owned(), - readonly: true, - }); - - s1.disks.insert("disk".to_owned(), d1); - s2.disks.insert("disk".to_owned(), d2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn compatible_nics() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let nic = nic(); - s1.nics.insert("nic".to_owned(), nic.clone()); - s2.nics.insert("nic".to_owned(), nic); - assert!(s1.is_migration_compatible(&s2).is_ok()); - } - - #[test] - fn nic_pci_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let n1 = nic(); - let mut n2 = n1.clone(); - n2.device_spec.pci_path = PciPath::new(0, 24, 0).unwrap(); - s1.nics.insert("nic".to_owned(), n1); - s2.nics.insert("nic".to_owned(), n2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn nic_backend_name_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let n1 = nic(); - let mut n2 = n1.clone(); - "other_backend".clone_into(&mut n2.device_spec.backend_name); - s1.nics.insert("nic".to_owned(), n1); - s2.nics.insert("nic".to_owned(), n2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn compatible_serial_ports() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let serial = serial_port(); - s1.serial.insert("com1".to_owned(), serial.clone()); - s2.serial.insert("com1".to_owned(), serial); - assert!(s1.is_migration_compatible(&s2).is_ok()); - } - - #[test] - fn serial_port_number_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let serial1 = serial_port(); - let mut serial2 = serial1.clone(); - serial2.num = SerialPortNumber::Com2; - s1.serial.insert("com1".to_owned(), serial1); - s2.serial.insert("com1".to_owned(), serial2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn compatible_bridges() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let bridge = bridge(); - s1.pci_pci_bridges.insert("bridge1".to_owned(), bridge); - s2.pci_pci_bridges.insert("bridge1".to_owned(), bridge); - assert!(s1.is_migration_compatible(&s2).is_ok()); - } - - #[test] - fn bridge_downstream_bus_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let b1 = bridge(); - let mut b2 = b1; - b2.downstream_bus += 1; - s1.pci_pci_bridges.insert("bridge1".to_owned(), b1); - s2.pci_pci_bridges.insert("bridge1".to_owned(), b2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn bridge_pci_path_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let b1 = bridge(); - let mut b2 = b1; - b2.pci_path = PciPath::new(0, 30, 0).unwrap(); - s1.pci_pci_bridges.insert("bridge1".to_owned(), b1); - s2.pci_pci_bridges.insert("bridge1".to_owned(), b2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn compatible_cpuid() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let mut set1 = CpuidSet::new(CpuidVendor::Intel); - let mut set2 = CpuidSet::new(CpuidVendor::Intel); - - s1.cpuid = Some(set1.clone()); - s2.cpuid = Some(set2.clone()); - s1.is_migration_compatible(&s2).unwrap(); - - set1.insert(CpuidIdent::leaf(0x1337), CpuidValues::default()).unwrap(); - set2.insert(CpuidIdent::leaf(0x1337), CpuidValues::default()).unwrap(); - - s1.cpuid = Some(set1.clone()); - s2.cpuid = Some(set2.clone()); - s1.is_migration_compatible(&s2).unwrap(); - - let values = CpuidValues { eax: 5, ebx: 6, ecx: 7, edx: 8 }; - set1.insert(CpuidIdent::subleaf(3, 4), values).unwrap(); - set2.insert(CpuidIdent::subleaf(3, 4), values).unwrap(); - s1.is_migration_compatible(&s2).unwrap(); - } - - #[test] - fn cpuid_explicitness_mismatch() { - let mut s1 = new_spec(); - let s2 = s1.clone(); - s1.cpuid = Some(CpuidSet::new(CpuidVendor::Intel)); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn cpuid_vendor_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - s1.cpuid = Some(CpuidSet::new(CpuidVendor::Intel)); - s2.cpuid = Some(CpuidSet::new(CpuidVendor::Amd)); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn cpuid_leaf_set_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let mut set1 = CpuidSet::new(CpuidVendor::Amd); - let mut set2 = CpuidSet::new(CpuidVendor::Amd); - - // Give the first set an entry the second set doesn't have. - set1.insert(CpuidIdent::leaf(0), CpuidValues::default()).unwrap(); - set1.insert(CpuidIdent::leaf(1), CpuidValues::default()).unwrap(); - set2.insert(CpuidIdent::leaf(0), CpuidValues::default()).unwrap(); - - s1.cpuid = Some(set1); - s2.cpuid = Some(set2.clone()); - assert!(s1.is_migration_compatible(&s2).is_err()); - - // Make the sets have the same number of entries, but with a difference - // in which entries they have. - set2.insert(CpuidIdent::leaf(3), CpuidValues::default()).unwrap(); - s2.cpuid = Some(set2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn cpuid_leaf_value_mismatch() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let mut set1 = CpuidSet::new(CpuidVendor::Amd); - let mut set2 = CpuidSet::new(CpuidVendor::Amd); - - let v1 = CpuidValues { eax: 4, ebx: 5, ecx: 6, edx: 7 }; - let v2 = CpuidValues { eax: 100, ebx: 200, ecx: 300, edx: 400 }; - set1.insert(CpuidIdent::leaf(0), v1).unwrap(); - set2.insert(CpuidIdent::leaf(0), v2).unwrap(); - s1.cpuid = Some(set1); - s2.cpuid = Some(set2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } - - #[test] - fn cpuid_leaf_subleaf_conflict() { - let mut s1 = new_spec(); - let mut s2 = s1.clone(); - let mut set1 = CpuidSet::new(CpuidVendor::Amd); - let mut set2 = CpuidSet::new(CpuidVendor::Amd); - - // Check that leaf 0 with no subleaf is not compatible with leaf 0 and a - // subleaf of 0. These are semantically different: the former matches - // leaf 0 with any subleaf value, while the latter technically matches - // only leaf 0 and subleaf 0 (with leaf-specific behavior if a different - // subleaf is specified). - set1.insert(CpuidIdent::leaf(0), CpuidValues::default()).unwrap(); - set2.insert(CpuidIdent::subleaf(0, 0), CpuidValues::default()).unwrap(); - s1.cpuid = Some(set1); - s2.cpuid = Some(set2); - assert!(s1.is_migration_compatible(&s2).is_err()); - } -} diff --git a/bin/propolis-server/src/lib/migrate/destination.rs b/bin/propolis-server/src/lib/migrate/destination.rs index eb5982f15..9156a02b2 100644 --- a/bin/propolis-server/src/lib/migrate/destination.rs +++ b/bin/propolis-server/src/lib/migrate/destination.rs @@ -27,6 +27,7 @@ use crate::migrate::probes; use crate::migrate::{ Device, MigrateError, MigratePhase, MigrateRole, MigrationState, PageIter, }; +use crate::spec::Spec; use crate::vm::ensure::{VmEnsureActive, VmEnsureNotStarted}; use crate::vm::state_publisher::{ ExternalStateUpdate, MigrationStateUpdate, StatePublisher, @@ -171,21 +172,24 @@ impl DestinationProtocol for RonV0 { let result = async { // Run the sync phase to ensure that the source's instance spec is // compatible with the spec supplied in the ensure parameters. - if let Err(e) = self.run_sync_phases(&mut ensure).await { - self.update_state( - ensure.state_publisher(), - MigrationState::Error, - ); - let e = ensure.fail(e.into()).await; - return Err(e - .downcast::() - .expect("original error was a MigrateError")); - } + let spec = match self.run_sync_phases(&mut ensure).await { + Ok(spec) => spec, + Err(e) => { + self.update_state( + ensure.state_publisher(), + MigrationState::Error, + ); + let e = ensure.fail(e.into()).await; + return Err(e + .downcast::() + .expect("original error was a MigrateError")); + } + }; // The sync phase succeeded, so it's OK to go ahead with creating // the objects in the target's instance spec. let mut objects_created = - ensure.create_objects().await.map_err(|e| { + ensure.create_objects_from_spec(spec).await.map_err(|e| { MigrateError::TargetInstanceInitializationFailed( e.to_string(), ) @@ -260,14 +264,14 @@ impl RonV0 { async fn run_sync_phases( &mut self, ensure_ctx: &mut VmEnsureNotStarted<'_>, - ) -> Result<(), MigrateError> { + ) -> Result { let step = MigratePhase::MigrateSync; probes::migrate_phase_begin!(|| { step.to_string() }); - self.sync(ensure_ctx).await?; + let result = self.sync(ensure_ctx).await; probes::migrate_phase_end!(|| { step.to_string() }); - Ok(()) + result } async fn run_import_phases( @@ -330,7 +334,7 @@ impl RonV0 { async fn sync( &mut self, ensure_ctx: &mut VmEnsureNotStarted<'_>, - ) -> Result<(), MigrateError> { + ) -> Result { self.update_state(ensure_ctx.state_publisher(), MigrationState::Sync); let preamble: Preamble = match self.read_msg().await? { codec::Message::Serialized(s) => { @@ -346,18 +350,22 @@ impl RonV0 { }?; info!(self.log(), "Destination read Preamble: {:?}", preamble); - if let Err(e) = - preamble.check_compatibility(&ensure_ctx.instance_spec().clone()) - { - error!( - self.log(), - "source and destination instance specs incompatible"; - "error" => #%e - ); - return Err(MigrateError::InstanceSpecsIncompatible(e.to_string())); - } + let spec = match preamble.amend_spec(ensure_ctx.instance_spec()) { + Ok(spec) => spec, + Err(e) => { + error!( + self.log(), + "source and destination instance specs incompatible"; + "error" => #%e + ); + return Err(MigrateError::InstanceSpecsIncompatible( + e.to_string(), + )); + } + }; - self.send_msg(codec::Message::Okay).await + self.send_msg(codec::Message::Okay).await?; + Ok(spec) } async fn ram_push( diff --git a/bin/propolis-server/src/lib/migrate/mod.rs b/bin/propolis-server/src/lib/migrate/mod.rs index 6ccb6f8fc..033fdd0c8 100644 --- a/bin/propolis-server/src/lib/migrate/mod.rs +++ b/bin/propolis-server/src/lib/migrate/mod.rs @@ -12,7 +12,6 @@ use thiserror::Error; use tokio::io::{AsyncRead, AsyncWrite}; mod codec; -mod compat; pub mod destination; mod memx; mod preamble; diff --git a/bin/propolis-server/src/lib/migrate/preamble.rs b/bin/propolis-server/src/lib/migrate/preamble.rs index f80c4a767..6f0280401 100644 --- a/bin/propolis-server/src/lib/migrate/preamble.rs +++ b/bin/propolis-server/src/lib/migrate/preamble.rs @@ -2,10 +2,12 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use propolis_api_types::instance_spec::VersionedInstanceSpec; +use propolis_api_types::instance_spec::{ + v0::ComponentV0, VersionedInstanceSpec, +}; use serde::{Deserialize, Serialize}; -use crate::spec::{self, api_spec_v0::ApiSpecError}; +use crate::spec::{api_spec_v0::ApiSpecError, Spec, StorageBackend}; use super::MigrateError; @@ -20,26 +22,69 @@ impl Preamble { Preamble { instance_spec, blobs: Vec::new() } } - /// Checks to see if the serialized spec in this preamble is compatible with - /// the supplied `other_spec`. + /// Consume the spec in this Preamble and produce an instance spec suitable + /// for initializing the target VM. /// - /// This check runs on the destination. - pub fn check_compatibility( - self, - other_spec: &spec::Spec, - ) -> Result<(), MigrateError> { - let VersionedInstanceSpec::V0(v0_spec) = self.instance_spec; - let this_spec: spec::Spec = - v0_spec.try_into().map_err(|e: ApiSpecError| { + /// This routine enumerates the disks and NICs in the `target_spec` and + /// looks for disks with a Crucible backend and NICs with a viona backend. + /// Any such backends will replace the corresponding backend entries in the + /// source spec. If the target spec contains a replacement backend that is + /// not present in the source spec, this routine fails. + pub fn amend_spec(self, target_spec: &Spec) -> Result { + let VersionedInstanceSpec::V0(mut source_spec) = self.instance_spec; + for disk in target_spec.disks.values() { + let StorageBackend::Crucible(crucible) = &disk.backend_spec else { + continue; + }; + + let Some(to_amend) = + source_spec.components.get_mut(disk.device_spec.backend_name()) + else { + return Err(MigrateError::InstanceSpecsIncompatible(format!( + "replacement component {} not in source spec", + disk.device_spec.backend_name() + ))); + }; + + if !matches!(to_amend, ComponentV0::CrucibleStorageBackend(_)) { + return Err(MigrateError::InstanceSpecsIncompatible(format!( + "component {} is not a Crucible backend in the source spec", + disk.device_spec.backend_name() + ))); + } + + *to_amend = ComponentV0::CrucibleStorageBackend(crucible.clone()); + } + + for nic in target_spec.nics.values() { + let Some(to_amend) = + source_spec.components.get_mut(&nic.device_spec.backend_name) + else { + return Err(MigrateError::InstanceSpecsIncompatible(format!( + "replacement component {} not in source spec", + nic.device_spec.backend_name + ))); + }; + + if !matches!(to_amend, ComponentV0::VirtioNetworkBackend(_)) { + return Err(MigrateError::InstanceSpecsIncompatible(format!( + "component {} is not a virtio network backend \ + in the source spec", + nic.device_spec.backend_name + ))); + } + + *to_amend = + ComponentV0::VirtioNetworkBackend(nic.backend_spec.clone()); + } + + let amended_spec = + source_spec.try_into().map_err(|e: ApiSpecError| { MigrateError::PreambleParse(e.to_string()) })?; - this_spec.is_migration_compatible(other_spec).map_err(|e| { - MigrateError::InstanceSpecsIncompatible(e.to_string()) - })?; - // TODO: Compare opaque blobs. - Ok(()) + Ok(amended_spec) } } diff --git a/bin/propolis-server/src/lib/spec/mod.rs b/bin/propolis-server/src/lib/spec/mod.rs index 414453553..c87c5a431 100644 --- a/bin/propolis-server/src/lib/spec/mod.rs +++ b/bin/propolis-server/src/lib/spec/mod.rs @@ -294,17 +294,6 @@ pub struct SoftNpu { pub p9fs: Option, } -#[cfg(feature = "falcon")] -impl SoftNpu { - /// Returns `true` if this struct specifies at least one SoftNPU component. - pub fn has_components(&self) -> bool { - self.pci_port.is_some() - || self.p9_device.is_some() - || self.p9fs.is_some() - || !self.ports.is_empty() - } -} - struct ParsedDiskRequest { name: String, disk: Disk, diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index ff4fec107..f3e1cd7dd 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -41,7 +41,10 @@ use crate::{ }, spec::Spec, stats::{create_kstat_sampler, VirtualMachine}, - vm::request_queue::InstanceAutoStart, + vm::{ + request_queue::InstanceAutoStart, VMM_BASE_RT_THREADS, + VMM_MIN_RT_THREADS, + }, }; use super::{ @@ -64,7 +67,13 @@ pub(crate) struct VmEnsureNotStarted<'a> { log: &'a slog::Logger, vm: &'a Arc, ensure_request: &'a VmEnsureRequest, - ensure_options: &'a EnsureOptions, + + // VM objects are created on a separate tokio task from the one that drives + // the instance ensure state machine. This task needs its own copy of the + // ensure options. `EnsureOptions` is not `Clone`, so take a reference to an + // `Arc` wrapper around the options to have something that can be cloned and + // passed to the ensure task. + ensure_options: &'a Arc, ensure_response_tx: InstanceEnsureResponseTx, state_publisher: &'a mut StatePublisher, } @@ -74,7 +83,7 @@ impl<'a> VmEnsureNotStarted<'a> { log: &'a slog::Logger, vm: &'a Arc, ensure_request: &'a VmEnsureRequest, - ensure_options: &'a EnsureOptions, + ensure_options: &'a Arc, ensure_response_tx: InstanceEnsureResponseTx, state_publisher: &'a mut StatePublisher, ) -> Self { @@ -96,10 +105,25 @@ impl<'a> VmEnsureNotStarted<'a> { self.state_publisher } + pub(crate) async fn create_objects_from_request( + self, + ) -> anyhow::Result> { + let spec = self.ensure_request.instance_spec.clone(); + self.create_objects(spec).await + } + + pub(crate) async fn create_objects_from_spec( + self, + spec: Spec, + ) -> anyhow::Result> { + self.create_objects(spec).await + } + /// Creates a set of VM objects using the instance spec stored in this /// ensure request, but does not install them as an active VM. - pub(crate) async fn create_objects( + async fn create_objects( self, + spec: Spec, ) -> anyhow::Result> { debug!(self.log, "creating VM objects"); @@ -111,7 +135,38 @@ impl<'a> VmEnsureNotStarted<'a> { }, )); - match self.initialize_vm_objects_from_spec(&input_queue).await { + // Create the runtime that will host tasks created by VMM components + // (e.g. block device runtime tasks). + let vmm_rt = tokio::runtime::Builder::new_multi_thread() + .thread_name("tokio-rt-vmm") + .worker_threads(usize::max( + VMM_MIN_RT_THREADS, + VMM_BASE_RT_THREADS + spec.board.cpus as usize, + )) + .enable_all() + .build()?; + + let log_for_init = self.log.clone(); + let properties = self.ensure_request.properties.clone(); + let options = self.ensure_options.clone(); + let queue_for_init = input_queue.clone(); + let init_result = vmm_rt + .spawn(async move { + initialize_vm_objects( + log_for_init, + spec, + properties, + options, + queue_for_init, + ) + .await + }) + .await + .map_err(|e| { + anyhow::anyhow!("failed to join VM object creation task: {e}") + })?; + + match init_result { Ok(objects) => { // N.B. Once these `VmObjects` exist, it is no longer safe to // call `vm_init_failed`. @@ -124,6 +179,7 @@ impl<'a> VmEnsureNotStarted<'a> { Ok(VmEnsureObjectsCreated { log: self.log, vm: self.vm, + vmm_rt, ensure_request: self.ensure_request, ensure_options: self.ensure_options, ensure_response_tx: self.ensure_response_tx, @@ -148,114 +204,6 @@ impl<'a> VmEnsureNotStarted<'a> { reason } - - async fn initialize_vm_objects_from_spec( - &self, - event_queue: &Arc, - ) -> anyhow::Result { - let properties = &self.ensure_request.properties; - let spec = &self.ensure_request.instance_spec; - let options = self.ensure_options; - - info!(self.log, "initializing new VM"; - "spec" => #?spec, - "properties" => #?properties, - "use_reservoir" => options.use_reservoir, - "bootrom" => %options.toml_config.bootrom.display()); - - let vmm_log = self.log.new(slog::o!("component" => "vmm")); - - // Set up the 'shell' instance into which the rest of this routine will - // add components. - let machine = build_instance( - &properties.vm_name(), - spec, - options.use_reservoir, - vmm_log, - )?; - - let mut init = MachineInitializer { - log: self.log.clone(), - machine: &machine, - devices: Default::default(), - block_backends: Default::default(), - crucible_backends: Default::default(), - spec, - properties, - toml_config: &options.toml_config, - producer_registry: options.oximeter_registry.clone(), - state: MachineInitializerState::default(), - kstat_sampler: initialize_kstat_sampler( - self.log, - self.instance_spec(), - options.oximeter_registry.clone(), - ), - stats_vm: VirtualMachine::new(spec.board.cpus, properties), - }; - - init.initialize_rom(options.toml_config.bootrom.as_path())?; - let chipset = init.initialize_chipset( - &(event_queue.clone() - as Arc), - )?; - - init.initialize_rtc(&chipset)?; - init.initialize_hpet(); - - let com1 = Arc::new(init.initialize_uart(&chipset)); - let ps2ctrl = init.initialize_ps2(&chipset); - init.initialize_qemu_debug_port()?; - init.initialize_qemu_pvpanic(VirtualMachine::new( - self.instance_spec().board.cpus, - properties, - ))?; - init.initialize_network_devices(&chipset).await?; - - #[cfg(not(feature = "omicron-build"))] - init.initialize_test_devices(&options.toml_config.devices); - #[cfg(feature = "omicron-build")] - info!( - self.log, - "`omicron-build` feature enabled, ignoring any test devices" - ); - - #[cfg(feature = "falcon")] - { - init.initialize_softnpu_ports(&chipset)?; - init.initialize_9pfs(&chipset); - } - - init.initialize_storage_devices(&chipset, options.nexus_client.clone()) - .await?; - - let ramfb = init.initialize_fwcfg(self.instance_spec().board.cpus)?; - init.initialize_cpus().await?; - let vcpu_tasks = Box::new(crate::vcpu_tasks::VcpuTasks::new( - &machine, - event_queue.clone() - as Arc, - self.log.new(slog::o!("component" => "vcpu_tasks")), - )?); - - let MachineInitializer { - devices, - block_backends, - crucible_backends, - .. - } = init; - - Ok(InputVmObjects { - instance_spec: spec.clone(), - vcpu_tasks, - machine, - devices, - block_backends, - crucible_backends, - com1, - framebuffer: Some(ramfb), - ps2ctrl, - }) - } } /// Represents an instance ensure request that has proceeded far enough to @@ -264,6 +212,7 @@ impl<'a> VmEnsureNotStarted<'a> { pub(crate) struct VmEnsureObjectsCreated<'a> { log: &'a slog::Logger, vm: &'a Arc, + vmm_rt: tokio::runtime::Runtime, ensure_request: &'a VmEnsureRequest, ensure_options: &'a EnsureOptions, ensure_response_tx: InstanceEnsureResponseTx, @@ -301,12 +250,14 @@ impl<'a> VmEnsureObjectsCreated<'a> { ) .await; + let vmm_rt_hdl = self.vmm_rt.handle().clone(); self.vm .make_active( self.log, self.input_queue.clone(), &self.vm_objects, vm_services, + self.vmm_rt, ) .await; @@ -325,6 +276,7 @@ impl<'a> VmEnsureObjectsCreated<'a> { VmEnsureActive { vm: self.vm, + vmm_rt_hdl, state_publisher: self.state_publisher, vm_objects: self.vm_objects, input_queue: self.input_queue, @@ -338,12 +290,19 @@ impl<'a> VmEnsureObjectsCreated<'a> { /// not started yet. pub(crate) struct VmEnsureActive<'a> { vm: &'a Arc, + vmm_rt_hdl: tokio::runtime::Handle, state_publisher: &'a mut StatePublisher, vm_objects: Arc, input_queue: Arc, kernel_vm_paused: bool, } +pub(super) struct VmEnsureActiveOutput { + pub vm_objects: Arc, + pub input_queue: Arc, + pub vmm_rt_hdl: tokio::runtime::Handle, +} + impl<'a> VmEnsureActive<'a> { pub(crate) fn vm_objects(&self) -> &Arc { &self.vm_objects @@ -373,11 +332,115 @@ impl<'a> VmEnsureActive<'a> { /// Yields the VM objects and input queue for this VM so that they can be /// used to start a state driver loop. - pub(super) fn into_inner(self) -> (Arc, Arc) { - (self.vm_objects, self.input_queue) + pub(super) fn into_inner(self) -> VmEnsureActiveOutput { + VmEnsureActiveOutput { + vm_objects: self.vm_objects, + input_queue: self.input_queue, + vmm_rt_hdl: self.vmm_rt_hdl, + } } } +async fn initialize_vm_objects( + log: slog::Logger, + spec: Spec, + properties: InstanceProperties, + options: Arc, + event_queue: Arc, +) -> anyhow::Result { + info!(log, "initializing new VM"; + "spec" => #?spec, + "properties" => #?properties, + "use_reservoir" => options.use_reservoir, + "bootrom" => %options.toml_config.bootrom.display()); + + let vmm_log = log.new(slog::o!("component" => "vmm")); + + // Set up the 'shell' instance into which the rest of this routine will + // add components. + let machine = build_instance( + &properties.vm_name(), + &spec, + options.use_reservoir, + vmm_log, + )?; + + let mut init = MachineInitializer { + log: log.clone(), + machine: &machine, + devices: Default::default(), + block_backends: Default::default(), + crucible_backends: Default::default(), + spec: &spec, + properties: &properties, + toml_config: &options.toml_config, + producer_registry: options.oximeter_registry.clone(), + state: MachineInitializerState::default(), + kstat_sampler: initialize_kstat_sampler( + &log, + &spec, + options.oximeter_registry.clone(), + ), + stats_vm: VirtualMachine::new(spec.board.cpus, &properties), + }; + + init.initialize_rom(options.toml_config.bootrom.as_path())?; + let chipset = init.initialize_chipset( + &(event_queue.clone() + as Arc), + )?; + + init.initialize_rtc(&chipset)?; + init.initialize_hpet(); + + let com1 = Arc::new(init.initialize_uart(&chipset)); + let ps2ctrl = init.initialize_ps2(&chipset); + init.initialize_qemu_debug_port()?; + init.initialize_qemu_pvpanic(VirtualMachine::new( + spec.board.cpus, + &properties, + ))?; + init.initialize_network_devices(&chipset).await?; + + #[cfg(not(feature = "omicron-build"))] + init.initialize_test_devices(&options.toml_config.devices); + #[cfg(feature = "omicron-build")] + info!(log, "`omicron-build` feature enabled, ignoring any test devices"); + + #[cfg(feature = "falcon")] + { + init.initialize_softnpu_ports(&chipset)?; + init.initialize_9pfs(&chipset); + } + + init.initialize_storage_devices(&chipset, options.nexus_client.clone()) + .await?; + + let ramfb = init.initialize_fwcfg(spec.board.cpus)?; + init.initialize_cpus().await?; + let vcpu_tasks = Box::new(crate::vcpu_tasks::VcpuTasks::new( + &machine, + event_queue.clone() as Arc, + log.new(slog::o!("component" => "vcpu_tasks")), + )?); + + let MachineInitializer { + devices, block_backends, crucible_backends, .. + } = init; + + Ok(InputVmObjects { + instance_spec: spec.clone(), + vcpu_tasks, + machine, + devices, + block_backends, + crucible_backends, + com1, + framebuffer: Some(ramfb), + ps2ctrl, + }) +} + /// Create an object used to sample kstats. fn initialize_kstat_sampler( log: &slog::Logger, diff --git a/bin/propolis-server/src/lib/vm/mod.rs b/bin/propolis-server/src/lib/vm/mod.rs index fea20119f..258c1976f 100644 --- a/bin/propolis-server/src/lib/vm/mod.rs +++ b/bin/propolis-server/src/lib/vm/mod.rs @@ -87,8 +87,8 @@ use oximeter::types::ProducerRegistry; use propolis_api_types::{ instance_spec::VersionedInstanceSpec, InstanceEnsureResponse, InstanceMigrateStatusResponse, InstanceMigrationStatus, InstanceProperties, - InstanceSpecGetResponse, InstanceState, InstanceStateMonitorResponse, - MigrationState, + InstanceSpecGetResponse, InstanceSpecStatus, InstanceState, + InstanceStateMonitorResponse, MigrationState, }; use slog::info; use state_driver::StateDriverOutput; @@ -179,9 +179,6 @@ pub(crate) enum VmError { #[error("Forbidden state change")] ForbiddenStateChange(#[from] request_queue::RequestDeniedReason), - - #[error("Failed to initialize VM's tokio runtime")] - TokioRuntimeInitializationFailed(#[source] std::io::Error), } /// The top-level VM wrapper type. @@ -207,6 +204,29 @@ struct VmInner { driver: Option>, } +/// Stores a possibly-absent instance spec with a reason for its absence. +#[derive(Clone, Debug)] +enum MaybeSpec { + Present(Spec), + + /// The spec is not known yet because the VM is initializing via live + /// migration, and the source's spec is not available yet. + WaitingForMigrationSource, +} + +impl From for InstanceSpecStatus { + fn from(value: MaybeSpec) -> Self { + match value { + MaybeSpec::WaitingForMigrationSource => { + Self::WaitingForMigrationSource + } + MaybeSpec::Present(spec) => { + Self::Present(VersionedInstanceSpec::V0(spec.into())) + } + } + } +} + /// Describes a past or future VM and its properties. struct VmDescription { /// Records the VM's last externally-visible state. @@ -215,9 +235,6 @@ struct VmDescription { /// The VM's API-level instance properties. properties: InstanceProperties, - /// The VM's last-known instance specification. - spec: Spec, - /// The runtime on which the VM's state driver is running (or on which it /// ran). tokio_rt: Option, @@ -231,17 +248,17 @@ enum VmState { /// A new state driver is attempting to initialize objects for a VM with the /// ecnlosed description. - WaitingForInit(VmDescription), + WaitingForInit { vm: VmDescription, spec: MaybeSpec }, /// The VM is active, and callers can obtain a handle to its objects. Active(active::ActiveVm), /// The previous VM is shutting down, but its objects have not been fully /// destroyed yet. - Rundown(VmDescription), + Rundown { vm: VmDescription, spec: Spec }, /// The previous VM and its objects have been cleaned up. - RundownComplete(VmDescription), + RundownComplete { vm: VmDescription, spec: MaybeSpec }, } impl std::fmt::Display for VmState { @@ -251,10 +268,10 @@ impl std::fmt::Display for VmState { "{}", match self { Self::NoVm => "NoVm", - Self::WaitingForInit(_) => "WaitingForInit", + Self::WaitingForInit { .. } => "WaitingForInit", Self::Active(_) => "Active", - Self::Rundown(_) => "Rundown", - Self::RundownComplete(_) => "RundownComplete", + Self::Rundown { .. } => "Rundown", + Self::RundownComplete { .. } => "RundownComplete", } ) } @@ -332,20 +349,26 @@ impl Vm { let state = vm.external_state_rx.borrow().clone(); Some(InstanceSpecGetResponse { properties: vm.properties.clone(), - spec: VersionedInstanceSpec::V0(spec.into()), + spec: InstanceSpecStatus::Present( + VersionedInstanceSpec::V0(spec.into()), + ), state: state.state, }) } - - // If the VM is not active yet, or there is only a - // previously-run-down VM, return the state saved in the state - // machine. - VmState::WaitingForInit(vm) - | VmState::Rundown(vm) - | VmState::RundownComplete(vm) => Some(InstanceSpecGetResponse { + VmState::WaitingForInit { vm, spec } + | VmState::RundownComplete { vm, spec } => { + Some(InstanceSpecGetResponse { + properties: vm.properties.clone(), + state: vm.external_state_rx.borrow().state, + spec: spec.clone().into(), + }) + } + VmState::Rundown { vm, spec } => Some(InstanceSpecGetResponse { properties: vm.properties.clone(), state: vm.external_state_rx.borrow().state, - spec: VersionedInstanceSpec::V0(vm.spec.clone().into()), + spec: InstanceSpecStatus::Present(VersionedInstanceSpec::V0( + spec.clone().into(), + )), }), } } @@ -362,9 +385,9 @@ impl Vm { match &guard.state { VmState::NoVm => None, VmState::Active(vm) => Some(vm.external_state_rx.clone()), - VmState::WaitingForInit(vm) - | VmState::Rundown(vm) - | VmState::RundownComplete(vm) => { + VmState::WaitingForInit { vm, .. } + | VmState::Rundown { vm, .. } + | VmState::RundownComplete { vm, .. } => { Some(vm.external_state_rx.clone()) } } @@ -383,12 +406,13 @@ impl Vm { state_driver_queue: Arc, objects: &Arc, services: services::VmServices, + vmm_rt: tokio::runtime::Runtime, ) { info!(self.log, "installing active VM"); let mut guard = self.inner.write().await; let old = std::mem::replace(&mut guard.state, VmState::NoVm); match old { - VmState::WaitingForInit(vm) => { + VmState::WaitingForInit { vm, .. } => { guard.state = VmState::Active(ActiveVm { log: log.clone(), state_driver_queue, @@ -396,7 +420,7 @@ impl Vm { properties: vm.properties, objects: objects.clone(), services, - tokio_rt: vm.tokio_rt.expect("WaitingForInit has runtime"), + tokio_rt: vmm_rt, }); } state => unreachable!( @@ -419,8 +443,8 @@ impl Vm { let mut guard = self.inner.write().await; let old = std::mem::replace(&mut guard.state, VmState::NoVm); match old { - VmState::WaitingForInit(vm) => { - guard.state = VmState::RundownComplete(vm) + VmState::WaitingForInit { vm, spec } => { + guard.state = VmState::RundownComplete { vm, spec } } state => unreachable!( "start failures should only occur before an active VM is \ @@ -451,12 +475,14 @@ impl Vm { let spec = vm.objects().lock_shared().await.instance_spec().clone(); let ActiveVm { external_state_rx, properties, tokio_rt, .. } = vm; - guard.state = VmState::Rundown(VmDescription { - external_state_rx, - properties, + guard.state = VmState::Rundown { + vm: VmDescription { + external_state_rx, + properties, + tokio_rt: Some(tokio_rt), + }, spec, - tokio_rt: Some(tokio_rt), - }); + }; vm.services }; @@ -475,9 +501,12 @@ impl Vm { let mut guard = self.inner.write().await; let old = std::mem::replace(&mut guard.state, VmState::NoVm); let rt = match old { - VmState::Rundown(mut vm) => { + VmState::Rundown { mut vm, spec } => { let rt = vm.tokio_rt.take().expect("rundown VM has a runtime"); - guard.state = VmState::RundownComplete(vm); + guard.state = VmState::RundownComplete { + vm, + spec: MaybeSpec::Present(spec), + }; rt } state => unreachable!( @@ -553,31 +582,27 @@ impl Vm { { let mut guard = self.inner.write().await; match guard.state { - VmState::WaitingForInit(_) => { - return Err(VmError::WaitingToInitialize); + VmState::WaitingForInit { .. } => { + return Err(VmError::WaitingToInitialize) + } + VmState::Active { .. } => { + return Err(VmError::AlreadyInitialized) + } + VmState::Rundown { .. } => { + return Err(VmError::RundownInProgress) } - VmState::Active(_) => return Err(VmError::AlreadyInitialized), - VmState::Rundown(_) => return Err(VmError::RundownInProgress), _ => {} }; - let thread_count = usize::max( - VMM_MIN_RT_THREADS, - VMM_BASE_RT_THREADS - + ensure_request.instance_spec.board.cpus as usize, - ); - - let tokio_rt = tokio::runtime::Builder::new_multi_thread() - .thread_name("tokio-rt-vmm") - .worker_threads(thread_count) - .enable_all() - .build() - .map_err(VmError::TokioRuntimeInitializationFailed)?; - let properties = ensure_request.properties.clone(); - let spec = ensure_request.instance_spec.clone(); + let spec = if ensure_request.migrate.is_some() { + MaybeSpec::WaitingForMigrationSource + } else { + MaybeSpec::Present(ensure_request.instance_spec.clone()) + }; + let vm_for_driver = self.clone(); - guard.driver = Some(tokio_rt.spawn(async move { + guard.driver = Some(tokio::spawn(async move { state_driver::run_state_driver( log_for_driver, vm_for_driver, @@ -589,12 +614,14 @@ impl Vm { .await })); - guard.state = VmState::WaitingForInit(VmDescription { - external_state_rx: external_rx.clone(), - properties, + guard.state = VmState::WaitingForInit { + vm: VmDescription { + external_state_rx: external_rx.clone(), + properties, + tokio_rt: None, + }, spec, - tokio_rt: Some(tokio_rt), - }); + }; } // Wait for the state driver task to dispose of this request. diff --git a/bin/propolis-server/src/lib/vm/state_driver.rs b/bin/propolis-server/src/lib/vm/state_driver.rs index a0298b4e4..73e1768be 100644 --- a/bin/propolis-server/src/lib/vm/state_driver.rs +++ b/bin/propolis-server/src/lib/vm/state_driver.rs @@ -27,7 +27,10 @@ use crate::{ }; use super::{ - ensure::{VmEnsureActive, VmEnsureNotStarted, VmEnsureRequest}, + ensure::{ + VmEnsureActive, VmEnsureActiveOutput, VmEnsureNotStarted, + VmEnsureRequest, + }, guest_event::{self, GuestEvent}, objects::VmObjects, request_queue::{self, ExternalRequest, InstanceAutoStart}, @@ -264,6 +267,7 @@ pub(super) async fn run_state_driver( ensure_result_tx: InstanceEnsureResponseTx, ensure_options: super::EnsureOptions, ) -> StateDriverOutput { + let ensure_options = Arc::new(ensure_options); let activated_vm = match create_and_activate_vm( &log, &vm, @@ -284,10 +288,12 @@ pub(super) async fn run_state_driver( } }; - let (objects, input_queue) = activated_vm.into_inner(); + let VmEnsureActiveOutput { vm_objects, input_queue, vmm_rt_hdl } = + activated_vm.into_inner(); + let state_driver = StateDriver { log, - objects, + objects: vm_objects, input_queue, external_state: state_publisher, paused: false, @@ -296,9 +302,18 @@ pub(super) async fn run_state_driver( // Run the VM until it exits, then set rundown on the parent VM so that no // new external callers can access its objects or services. - let output = state_driver.run(ensure_request.migrate.is_some()).await; - vm.set_rundown().await; - output + match vmm_rt_hdl + .spawn(async move { + let output = + state_driver.run(ensure_request.migrate.is_some()).await; + vm.set_rundown().await; + output + }) + .await + { + Ok(output) => output, + Err(e) => panic!("failed to join state driver task: {e}"), + } } /// Processes the supplied `ensure_request` to create a set of VM objects that @@ -309,7 +324,7 @@ async fn create_and_activate_vm<'a>( state_publisher: &'a mut StatePublisher, ensure_request: &'a VmEnsureRequest, ensure_result_tx: InstanceEnsureResponseTx, - ensure_options: &'a super::EnsureOptions, + ensure_options: &'a Arc, ) -> anyhow::Result> { let ensure = VmEnsureNotStarted::new( log, @@ -347,7 +362,7 @@ async fn create_and_activate_vm<'a>( .context("running live migration protocol")?) } else { let created = ensure - .create_objects() + .create_objects_from_request() .await .context("creating VM objects for new instance")?; diff --git a/crates/propolis-api-types/src/lib.rs b/crates/propolis-api-types/src/lib.rs index 44bd7a68d..db007667c 100644 --- a/crates/propolis-api-types/src/lib.rs +++ b/crates/propolis-api-types/src/lib.rs @@ -165,11 +165,18 @@ pub struct InstanceGetResponse { pub instance: Instance, } +#[derive(Clone, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "type", content = "value")] +pub enum InstanceSpecStatus { + WaitingForMigrationSource, + Present(VersionedInstanceSpec), +} + #[derive(Clone, Deserialize, Serialize, JsonSchema)] pub struct InstanceSpecGetResponse { pub properties: InstanceProperties, pub state: InstanceState, - pub spec: VersionedInstanceSpec, + pub spec: InstanceSpecStatus, } #[derive(Clone, Deserialize, Serialize, JsonSchema)] diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index 964b630c8..aba9a1c03 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -1431,7 +1431,7 @@ "$ref": "#/components/schemas/InstanceProperties" }, "spec": { - "$ref": "#/components/schemas/VersionedInstanceSpec" + "$ref": "#/components/schemas/InstanceSpecStatus" }, "state": { "$ref": "#/components/schemas/InstanceState" @@ -1443,6 +1443,42 @@ "state" ] }, + "InstanceSpecStatus": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "WaitingForMigrationSource" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "Present" + ] + }, + "value": { + "$ref": "#/components/schemas/VersionedInstanceSpec" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, "InstanceSpecV0": { "type": "object", "properties": { diff --git a/phd-tests/tests/src/cpuid.rs b/phd-tests/tests/src/cpuid.rs index 3ec773761..9d1a4e7c0 100644 --- a/phd-tests/tests/src/cpuid.rs +++ b/phd-tests/tests/src/cpuid.rs @@ -4,7 +4,9 @@ use cpuid_utils::{CpuidIdent, CpuidValues, CpuidVendor}; use phd_testcase::*; -use propolis_client::types::CpuidEntry; +use propolis_client::types::{ + CpuidEntry, InstanceSpecStatus, VersionedInstanceSpec, +}; use tracing::info; fn cpuid_entry( @@ -34,8 +36,11 @@ async fn cpuid_instance_spec_round_trip_test(ctx: &Framework) { vm.launch().await?; let spec_get_response = vm.get_spec().await?; - let propolis_client::types::VersionedInstanceSpec::V0(spec) = - spec_get_response.spec; + let InstanceSpecStatus::Present(VersionedInstanceSpec::V0(spec)) = + spec_get_response.spec + else { + panic!("instance spec should be present for a running VM"); + }; let cpuid = spec.board.cpuid.expect("board should have explicit CPUID"); assert_eq!(cpuid.entries.len(), entries.len()); diff --git a/phd-tests/tests/src/migrate.rs b/phd-tests/tests/src/migrate.rs index 78438be79..a3506d107 100644 --- a/phd-tests/tests/src/migrate.rs +++ b/phd-tests/tests/src/migrate.rs @@ -274,56 +274,6 @@ mod running_process { } } -#[phd_testcase] -async fn incompatible_vms(ctx: &Framework) { - let mut builders = vec![ - ctx.vm_config_builder("migration_incompatible_target_1"), - ctx.vm_config_builder("migration_incompatible_target_2"), - ]; - - builders[0].cpus(8); - builders[1].memory_mib(1024); - - for (i, cfg) in builders.into_iter().enumerate() { - let mut source = ctx - .spawn_vm( - ctx.vm_config_builder(&format!( - "migration_incompatible_source_{}", - i - )) - .cpus(4) - .memory_mib(512), - None, - ) - .await?; - - source.launch().await?; - let mut target = ctx.spawn_vm(&cfg, None).await?; - - let migration_id = Uuid::new_v4(); - assert!(target - .migrate_from(&source, migration_id, MigrationTimeout::default()) - .await - .is_err()); - - let src_migration_state = source - .get_migration_state() - .await? - .migration_out - .expect("source should have a migration-out status") - .state; - assert_eq!(src_migration_state, MigrationState::Error); - - let target_migration_state = target - .get_migration_state() - .await? - .migration_in - .expect("target should have a migration-in status") - .state; - assert_eq!(target_migration_state, MigrationState::Error); - } -} - #[phd_testcase] async fn multiple_migrations(ctx: &Framework) { let mut vm0 = ctx.spawn_default_vm("multiple_migrations_0").await?; diff --git a/phd-tests/tests/src/smoke.rs b/phd-tests/tests/src/smoke.rs index 592145b60..b3a14d3d4 100644 --- a/phd-tests/tests/src/smoke.rs +++ b/phd-tests/tests/src/smoke.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use phd_testcase::*; +use propolis_client::types::{InstanceSpecStatus, VersionedInstanceSpec}; #[phd_testcase] async fn nproc_test(ctx: &Framework) { @@ -46,8 +47,11 @@ async fn instance_spec_get_test(ctx: &Framework) { vm.launch().await?; let spec_get_response = vm.get_spec().await?; - let propolis_client::types::VersionedInstanceSpec::V0(spec) = - spec_get_response.spec; + let InstanceSpecStatus::Present(spec) = spec_get_response.spec else { + panic!("launched instance should have a spec"); + }; + + let VersionedInstanceSpec::V0(spec) = spec; assert_eq!(spec.board.cpus, 4); assert_eq!(spec.board.memory_mb, 3072); }