diff --git a/bin/propolis-server/src/lib/vcpu_tasks.rs b/bin/propolis-server/src/lib/vcpu_tasks.rs index be2d37f35..8a748cc63 100644 --- a/bin/propolis-server/src/lib/vcpu_tasks.rs +++ b/bin/propolis-server/src/lib/vcpu_tasks.rs @@ -124,7 +124,17 @@ impl VcpuTasks { Ok(exit) => exit, }; - entry = vcpu.process_vmexit(&exit).unwrap_or_else(|| { + let maybe_entry = match vcpu.process_vmexit(&exit) { + Ok(entry) => entry, + Err(e) => { + panic!( + "unhandled library error processing VM exit \ + {exit:?}: {e}" + ) + } + }; + + entry = maybe_entry.unwrap_or_else(|| { match exit.kind { VmExitKind::Inout(pio) => { debug!(&log, "Unhandled pio {:x?}", pio; diff --git a/bin/propolis-standalone/src/main.rs b/bin/propolis-standalone/src/main.rs index 10fa8968e..e12128b6f 100644 --- a/bin/propolis-standalone/src/main.rs +++ b/bin/propolis-standalone/src/main.rs @@ -607,7 +607,7 @@ impl Instance { Ok(exit) => exit, }; - entry = vcpu.process_vmexit(&exit).unwrap_or_else(|| { + entry = vcpu.process_vmexit(&exit).unwrap().unwrap_or_else(|| { match exit.kind { VmExitKind::Inout(pio) => { slog::error!( diff --git a/lib/propolis/src/exits.rs b/lib/propolis/src/exits.rs index 53df50e78..3b8a7c913 100644 --- a/lib/propolis/src/exits.rs +++ b/lib/propolis/src/exits.rs @@ -13,6 +13,7 @@ use bhyve_api::{ }; /// Describes the reason for exiting execution of a vCPU. +#[derive(Debug)] pub struct VmExit { /// The instruction pointer of the guest at the time of exit. pub rip: u64, diff --git a/lib/propolis/src/lib.rs b/lib/propolis/src/lib.rs index d79bd69b8..0b4cc7122 100644 --- a/lib/propolis/src/lib.rs +++ b/lib/propolis/src/lib.rs @@ -28,6 +28,7 @@ pub mod intr_pins; pub mod lifecycle; pub mod migrate; pub mod mmio; +pub mod msr; pub mod pio; pub mod tasks; pub mod util; diff --git a/lib/propolis/src/msr.rs b/lib/propolis/src/msr.rs new file mode 100644 index 000000000..ae7eb94c3 --- /dev/null +++ b/lib/propolis/src/msr.rs @@ -0,0 +1,240 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A virtual "address space" for model-specific registers (MSRs). +//! +//! MSRs provide system software with a way to configure or interact with the +//! underlying CPU in an extensible way that (as the name suggests) may be +//! specific to a particular range of CPU models. Some MSRs are architectural +//! parts of the x86-64 architecture, though many really are manufacturer- and +//! model-specific. +//! +//! This module provides the [`MsrSpace`] type, which provides a virtual +//! "address space" that other Propolis components can use to register to handle +//! RDMSR/WRMSR operations. Some architectural MSRs are handled entirely in +//! bhyve; those that are handled in Propolis are dispatched to the calling +//! CPU's MSR space for possible handling. +//! +//! Individual handlers are responsible for keeping track of the values that are +//! written to the MSRs they manage (including saving and restoring them during +//! live migration). + +use std::sync::{Arc, Mutex}; + +use crate::{ + util::aspace::{ASpace, Error as ASpaceError}, + vcpu::VcpuId, +}; +use thiserror::Error; + +#[usdt::provider(provider = "propolis")] +mod probes { + fn msr_read( + vcpuid: u32, + msr: u32, + val: u64, + handler_registered: u8, + handler_ok: u8, + disposition: u8, + ) { + } + + fn msr_write( + vcpuid: u32, + msr: u32, + val: u64, + handler_registered: u8, + handler_ok: u8, + disposition: u8, + ) { + } +} + +/// A handler for MSR operations. +/// +/// # Arguments +/// +/// - `VcpuId`: The ID of the vCPU that accessed an MSR. +/// - `MsrId`: The ID of the MSR being read or written. +/// - `MsrOp`: The operation to perform on the supplied MSR. +/// +/// # Return value +/// +/// - `Ok(disposition)` if the handler successfully processed the operation. The +/// enclosed [`MsrDisposition`] tells the caller if further action is +/// required. +/// - `Err` if the handler function encountered an internal error. The operation +/// is completely unhandled; in particular, if it was a [`MsrOp::Read`], no +/// output value was written. +pub type MsrFn = dyn Fn(VcpuId, MsrId, MsrOp) -> anyhow::Result + + Send + + Sync + + 'static; + +/// The 32-bit identifier for a specific MSR. +#[derive(Clone, Copy, Debug)] +pub struct MsrId(pub u32); + +/// An operation on an MSR. +pub enum MsrOp<'a> { + /// The guest executed RDMSR. The returned value (if any) is written to the + /// supplied `u64`. + Read(&'a mut u64), + + /// The guest executed WRMSR and passed the supplied `u64` as an operand. + Write(u64), +} + +/// The disposition of an operation on a MSR. +#[derive(Clone, Copy, Debug)] +#[repr(u8)] +pub enum MsrDisposition { + /// The MSR operation was handled and no further action is needed from the + /// caller. + Handled = 0, + + /// The caller should inject #GP into the RDMSR/WRMSR-executing vCPU. + GpException = 1, +} + +/// Errors that can arise while trying to dispatch an MSR operation. +#[derive(Debug, Error)] +pub enum Error { + #[error("no handler registered for MSR {0:#x}")] + HandlerNotFound(u32), + + #[error("address space operation failed")] + ASpace(#[from] ASpaceError), + + #[error("error from MSR handler")] + HandlerError(anyhow::Error), +} + +/// Manages the virtual MSR "address space". +pub struct MsrSpace { + /// The mapping from MSR IDs to handler functions. + map: Mutex>>, +} + +impl MsrSpace { + /// Creates a new MSR space. + pub fn new() -> Self { + Self { map: Mutex::new(ASpace::new(0, u32::MAX as usize)) } + } + + /// Registers `func` as the handler for the range of MSRs in + /// [`start`..`len`). + pub fn register( + &self, + start: MsrId, + len: u32, + func: Arc, + ) -> Result<(), Error> { + Ok(self.map.lock().unwrap().register( + start.0 as usize, + len as usize, + func, + )?) + } + + /// Unregisters the MSR handler that passed `base` as the starting MSR when + /// it called [`Self::register`]. + pub fn unregister(&self, base: MsrId) -> Result<(), Error> { + self.map.lock().unwrap().unregister(base.0 as usize)?; + Ok(()) + } + + /// Handles the RDMSR instruction. + pub fn rdmsr( + &self, + vcpu: VcpuId, + msr: MsrId, + out: &mut u64, + ) -> Result { + let res = self.do_msr_op(vcpu, msr, MsrOp::Read(out)); + probes::msr_read!(|| { + let info = ProbeInfo::from(&res); + let (ok, disposition) = if let Some(d) = info.disposition { + (true, d as u8) + } else { + (false, 0) + }; + (vcpu.0, msr.0, *out, info.registered as u8, ok as u8, disposition) + }); + res + } + + /// Handles the WRMSR instruction. + pub fn wrmsr( + &self, + vcpu: VcpuId, + msr: MsrId, + value: u64, + ) -> Result { + let res = self.do_msr_op(vcpu, msr, MsrOp::Write(value)); + probes::msr_write!(|| { + let info = ProbeInfo::from(&res); + let (ok, disposition) = if let Some(d) = info.disposition { + (true, d as u8) + } else { + (false, 0) + }; + (vcpu.0, msr.0, value, info.registered as u8, ok as u8, disposition) + }); + res + } + + /// Handles MSR operations. + fn do_msr_op( + &self, + vcpu: VcpuId, + msr: MsrId, + op: MsrOp, + ) -> Result { + let map = self.map.lock().unwrap(); + let handler = match map.region_at(msr.0 as usize) { + Ok((_start, _len, f)) => f, + Err(ASpaceError::NotFound) => { + return Err(Error::HandlerNotFound(msr.0)); + } + Err(e) => { + unreachable!("unexpected error {e} from MSR space lookup"); + } + }; + + let handler = Arc::clone(handler); + + // Allow other vCPUs to access the handler map while this operation is + // being processed. + drop(map); + handler(vcpu, msr, op).map_err(Error::HandlerError) + } +} + +/// A helper type for converting results from [`MsrSpace::do_msr_op`] into +/// USDT probe inputs. +struct ProbeInfo { + /// True if there was a handler registered for the target MSR. + registered: bool, + + /// `Some(disposition)` if the handler succeeded, `None` if it failed. + disposition: Option, +} + +impl From<&Result> for ProbeInfo { + fn from(value: &Result) -> Self { + match value { + Ok(d) => Self { registered: true, disposition: Some(*d) }, + Err(Error::HandlerNotFound(_)) => { + Self { registered: false, disposition: None } + } + Err(Error::HandlerError(_)) => { + Self { registered: true, disposition: None } + } + Err(Error::ASpace(_)) => unreachable!( + "shouldn't get an ASpaceError while handling MSR ops" + ), + } + } +} diff --git a/lib/propolis/src/vcpu.rs b/lib/propolis/src/vcpu.rs index 21fed44b5..1663f3902 100644 --- a/lib/propolis/src/vcpu.rs +++ b/lib/propolis/src/vcpu.rs @@ -5,6 +5,7 @@ //! Virtual CPU functionality. use std::io::Result; +use std::num::TryFromIntError; use std::sync::Arc; use crate::common::Lifecycle; @@ -12,9 +13,14 @@ use crate::cpuid; use crate::exits::*; use crate::migrate::*; use crate::mmio::MmioBus; +use crate::msr::Error as MsrError; +use crate::msr::MsrDisposition; +use crate::msr::MsrId; +use crate::msr::MsrSpace; use crate::pio::PioBus; use crate::tasks; use crate::vmm::VmmHdl; +use anyhow::Context; use cpuid_utils::{CpuidMapConversionError, CpuidSet}; use migrate::VcpuReadWrite; use thiserror::Error; @@ -31,6 +37,18 @@ mod probes { #[cfg(not(feature = "omicron-build"))] pub const MAXCPU: usize = bhyve_api::VM_MAXCPU; +/// The ID of a specific vCPU. +#[derive(Clone, Copy, Debug)] +pub struct VcpuId(pub u32); + +impl TryFrom for VcpuId { + type Error = TryFromIntError; + + fn try_from(value: i32) -> std::result::Result { + Ok(Self(u32::try_from(value)?)) + } +} + // Helios (stlouis) is built with an expanded limit of 64 #[cfg(feature = "omicron-build")] pub const MAXCPU: usize = 64; @@ -53,6 +71,7 @@ pub struct Vcpu { pub id: i32, pub bus_mmio: Arc, pub bus_pio: Arc, + msr: Arc, } impl Vcpu { @@ -62,8 +81,9 @@ impl Vcpu { id: i32, bus_mmio: Arc, bus_pio: Arc, + msr: Arc, ) -> Arc { - Arc::new(Self { hdl, id, bus_mmio, bus_pio }) + Arc::new(Self { hdl, id, bus_mmio, bus_pio, msr }) } /// ID of the virtual CPU. @@ -392,10 +412,33 @@ impl Vcpu { unsafe { self.hdl.ioctl(bhyve_api::VM_INJECT_NMI, &mut vm_nmi) } } + /// Send a general protection fault (#GP) to the vcpu. + pub fn inject_gp(&self) -> Result<()> { + let mut vm_excp = bhyve_api::vm_exception { + cpuid: self.cpuid(), + vector: i32::from(bits::IDT_GP), + error_code: 0, + error_code_valid: 0, + restart_instruction: 1, + }; + unsafe { self.hdl.ioctl(bhyve_api::VM_INJECT_EXCEPTION, &mut vm_excp) } + } + /// Process [`VmExit`] in the context of this vCPU, emitting a [`VmEntry`] /// if the parameters of the exit were such that they could be handled. - pub fn process_vmexit(&self, exit: &VmExit) -> Option { - match exit.kind { + /// + /// # Return value + /// + /// - `Ok(Some(entry))` if the exit was successfully handled. The payload + /// describes the parameters the caller should pass back to bhyve when + /// re-entering the guest. + /// - `Ok(None)` if the exit was not handled at this layer. + /// - `Err` if an internal error occurred while trying to handle the exit. + pub fn process_vmexit( + &self, + exit: &VmExit, + ) -> anyhow::Result> { + let entry = match exit.kind { VmExitKind::Bogus => Some(VmEntry::Run), VmExitKind::Inout(io) => match io { InoutReq::Out(io, val) => self @@ -432,9 +475,58 @@ impl Vcpu { }) .ok(), }, - VmExitKind::Rdmsr(_) | VmExitKind::Wrmsr(_, _) => { - // Leave it to the caller to emulate MSRs unhandled by the kernel - None + VmExitKind::Rdmsr(msr) => { + let mut out = 0u64; + match self.msr.rdmsr( + self.id.try_into().unwrap(), + MsrId(msr), + &mut out, + ) { + Ok(MsrDisposition::Handled) => { + self.set_reg( + bhyve_api::vm_reg_name::VM_REG_GUEST_RAX, + u64::from(out as u32), + ) + .unwrap(); + self.set_reg( + bhyve_api::vm_reg_name::VM_REG_GUEST_RDX, + out >> 32, + ) + .unwrap(); + Some(VmEntry::Run) + } + Ok(MsrDisposition::GpException) => { + self.inject_gp().unwrap(); + Some(VmEntry::Run) + } + Err(MsrError::HandlerNotFound(_)) => None, + Err(e) => { + return Err(e).with_context(|| { + format!("handling RDMSR for MSR {msr:#x}") + }) + } + } + } + VmExitKind::Wrmsr(msr, value) => { + match self.msr.wrmsr( + self.id.try_into().unwrap(), + MsrId(msr), + value, + ) { + Ok(MsrDisposition::Handled) => Some(VmEntry::Run), + Ok(MsrDisposition::GpException) => { + self.inject_gp().unwrap(); + Some(VmEntry::Run) + } + Err(MsrError::HandlerNotFound(_)) => None, + Err(e) => { + return Err(e).with_context(|| { + format!( + "handling WRMSR for MSR {msr:#x}, value {value:#x}" + ) + }) + } + } } VmExitKind::Debug => { // Until there is an interface to delay until a vCPU is no @@ -450,7 +542,9 @@ impl Vcpu { | VmExitKind::VmxError(_) | VmExitKind::SvmError(_) => None, _ => None, - } + }; + + Ok(entry) } } @@ -1340,4 +1434,6 @@ pub mod migrate { mod bits { pub const MSR_DEBUGCTL: u32 = 0x1d9; pub const MSR_EFER: u32 = 0xc0000080; + + pub const IDT_GP: u8 = 0xd; } diff --git a/lib/propolis/src/vmm/machine.rs b/lib/propolis/src/vmm/machine.rs index 2177fea60..a7446df63 100644 --- a/lib/propolis/src/vmm/machine.rs +++ b/lib/propolis/src/vmm/machine.rs @@ -10,6 +10,7 @@ use std::sync::Arc; use crate::accessors::*; use crate::mmio::MmioBus; +use crate::msr::MsrSpace; use crate::pio::PioBus; use crate::vcpu::{Vcpu, MAXCPU}; use crate::vmm::{create_vm, CreateOpts, PhysMap, VmmHdl}; @@ -31,6 +32,7 @@ pub struct Machine { pub map_physmem: PhysMap, pub bus_mmio: Arc, pub bus_pio: Arc, + pub msr: Arc, pub acc_mem: MemAccessor, pub acc_msi: MsiAccessor, @@ -118,9 +120,15 @@ impl Machine { let bus_mmio = Arc::new(MmioBus::new(MAX_PHYSMEM)); let bus_pio = Arc::new(PioBus::new()); + let msr = Arc::new(MsrSpace::new()); - let vcpus = - vec![Vcpu::new(hdl.clone(), 0, bus_mmio.clone(), bus_pio.clone())]; + let vcpus = vec![Vcpu::new( + hdl.clone(), + 0, + bus_mmio.clone(), + bus_pio.clone(), + msr.clone(), + )]; let acc_mem = MemAccessor::new(map.memctx()); let acc_msi = MsiAccessor::new(hdl.clone()); @@ -136,6 +144,7 @@ impl Machine { bus_mmio, bus_pio, + msr, destroyed: AtomicBool::new(false), }) @@ -238,6 +247,7 @@ impl Builder { let bus_mmio = Arc::new(MmioBus::new(MAX_PHYSMEM)); let bus_pio = Arc::new(PioBus::new()); + let msr = Arc::new(MsrSpace::new()); let acc_mem = MemAccessor::new(map.memctx()); let acc_msi = MsiAccessor::new(hdl.clone()); @@ -249,6 +259,7 @@ impl Builder { i32::from(id), bus_mmio.clone(), bus_pio.clone(), + msr.clone(), ) }) .collect(); @@ -264,6 +275,7 @@ impl Builder { bus_mmio, bus_pio, + msr, destroyed: AtomicBool::new(false), };