From 31533c0cde0eb9eabbf2d35ba86948064d269cb3 Mon Sep 17 00:00:00 2001 From: Alex Zenla Date: Sat, 14 Dec 2024 18:15:29 -0500 Subject: [PATCH] feat(xen): update xenclient and xenplatform to the latest structure --- Cargo.lock | 8 + Cargo.toml | 2 +- crates/xen/xenclient/Cargo.toml | 2 + crates/xen/xenclient/examples/boot.rs | 65 ++- crates/xen/xenclient/examples/boot_speed.rs | 66 +++ crates/xen/xenclient/src/config.rs | 185 +++++++ crates/xen/xenclient/src/devalloc.rs | 79 +++ crates/xen/xenclient/src/devstate.rs | 131 +++++ crates/xen/xenclient/src/error.rs | 8 + crates/xen/xenclient/src/lib.rs | 352 ++++++------ crates/xen/xenclient/src/tx.rs | 582 -------------------- crates/xen/xenclient/src/tx/channel.rs | 89 +++ crates/xen/xenclient/src/tx/fs9p.rs | 78 +++ crates/xen/xenclient/src/tx/mod.rs | 425 ++++++++++++++ crates/xen/xenclient/src/tx/pci.rs | 194 +++++++ crates/xen/xenclient/src/tx/vbd.rs | 131 +++++ crates/xen/xenclient/src/tx/vif.rs | 112 ++++ crates/xen/xenclient/src/util.rs | 21 + crates/xen/xenplatform/src/boot.rs | 259 ++++----- crates/xen/xenplatform/src/domain.rs | 145 +++-- crates/xen/xenplatform/src/elfloader.rs | 133 ++--- crates/xen/xenplatform/src/error.rs | 8 +- crates/xen/xenplatform/src/lib.rs | 95 ++++ crates/xen/xenplatform/src/x86pv.rs | 62 +-- 24 files changed, 2143 insertions(+), 1089 deletions(-) create mode 100644 crates/xen/xenclient/examples/boot_speed.rs create mode 100644 crates/xen/xenclient/src/config.rs create mode 100644 crates/xen/xenclient/src/devalloc.rs create mode 100644 crates/xen/xenclient/src/devstate.rs delete mode 100644 crates/xen/xenclient/src/tx.rs create mode 100644 crates/xen/xenclient/src/tx/channel.rs create mode 100644 crates/xen/xenclient/src/tx/fs9p.rs create mode 100644 crates/xen/xenclient/src/tx/mod.rs create mode 100644 crates/xen/xenclient/src/tx/pci.rs create mode 100644 crates/xen/xenclient/src/tx/vbd.rs create mode 100644 crates/xen/xenclient/src/tx/vif.rs create mode 100644 crates/xen/xenclient/src/util.rs diff --git a/Cargo.lock b/Cargo.lock index 6ff42806..67156e3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -107,6 +107,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "2.6.0" @@ -267,6 +273,8 @@ dependencies = [ name = "krata-xenclient" version = "0.0.23" dependencies = [ + "async-trait", + "bit-vec", "env_logger", "indexmap", "krata-xencall", diff --git a/Cargo.toml b/Cargo.toml index e473f12d..97edec05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,8 +17,8 @@ repository = "https://github.com/edera-dev/krata" [workspace.dependencies] async-trait = "0.1.83" +bit-vec = "0.8.0" byteorder = "1" -c2rust-bitfields = "0.19.0" elf = "0.7.4" env_logger = "0.11.5" flate2 = "1.0" diff --git a/crates/xen/xenclient/Cargo.toml b/crates/xen/xenclient/Cargo.toml index 6fd2fca1..8c255e5f 100644 --- a/crates/xen/xenclient/Cargo.toml +++ b/crates/xen/xenclient/Cargo.toml @@ -9,6 +9,8 @@ edition = "2021" resolver = "2" [dependencies] +async-trait = { workspace = true } +bit-vec = { workspace = true } indexmap = { workspace = true } log = { workspace = true } krata-xencall = { path = "../xencall", version = "^0.0.23" } diff --git a/crates/xen/xenclient/examples/boot.rs b/crates/xen/xenclient/examples/boot.rs index 9517b4de..7a1e72a3 100644 --- a/crates/xen/xenclient/examples/boot.rs +++ b/crates/xen/xenclient/examples/boot.rs @@ -1,15 +1,15 @@ +use std::sync::Arc; use std::{env, process}; use tokio::fs; use uuid::Uuid; use xenclient::error::Result; -use xenclient::{DomainConfig, XenClient}; -use xenplatform::domain::BaseDomainConfig; - -#[cfg(target_arch = "x86_64")] -type RuntimePlatform = xenplatform::x86pv::X86PvPlatform; - -#[cfg(not(target_arch = "x86_64"))] -type RuntimePlatform = xenplatform::unsupported::UnsupportedPlatform; +use xenclient::tx::channel::ChannelDeviceConfig; +use xenclient::{config::DomainConfig, XenClient}; +use xenplatform::domain::{ + KernelFormat, PlatformDomainConfig, PlatformKernelConfig, PlatformOptions, + PlatformResourcesConfig, +}; +use xenplatform::RuntimePlatformType; #[tokio::main] async fn main() -> Result<()> { @@ -22,32 +22,31 @@ async fn main() -> Result<()> { } let kernel_image_path = args.get(1).expect("argument not specified"); let initrd_path = args.get(2).expect("argument not specified"); - let client = XenClient::new(0, RuntimePlatform::new()).await?; - let config = DomainConfig { - base: BaseDomainConfig { - uuid: Uuid::new_v4(), - max_vcpus: 1, - target_vcpus: 1, - max_mem_mb: 512, - target_mem_mb: 512, - enable_iommu: true, - kernel: fs::read(&kernel_image_path).await?, - initrd: fs::read(&initrd_path).await?, + let client = XenClient::new().await?; + + let mut config = DomainConfig::new(); + config.platform(PlatformDomainConfig { + uuid: Uuid::new_v4(), + platform: RuntimePlatformType::Pv, + kernel: PlatformKernelConfig { + data: Arc::new(fs::read(&kernel_image_path).await?), + format: KernelFormat::ElfCompressed, cmdline: "earlyprintk=xen earlycon=xen console=hvc0 init=/init".to_string(), - owner_domid: 0, + initrd: Some(Arc::new(fs::read(&initrd_path).await?)), + }, + resources: PlatformResourcesConfig { + max_vcpus: 1, + assigned_vcpus: 1, + max_memory_mb: 512, + assigned_memory_mb: 512, }, - backend_domid: 0, - name: "xenclient-test".to_string(), - swap_console_backend: None, - disks: vec![], - channels: vec![], - vifs: vec![], - pcis: vec![], - filesystems: vec![], - extra_keys: vec![], - extra_rw_paths: vec![], - }; - let created = client.create(&config).await?; - println!("created domain {}", created.domid); + options: PlatformOptions { iommu: true }, + }); + config.name("xenclient-test"); + let mut channel = ChannelDeviceConfig::new(); + channel.default_console().backend_initialized(); + config.add_channel(channel); + let created = client.create(config).await?; + println!("created domain {}", created.platform.domid); Ok(()) } diff --git a/crates/xen/xenclient/examples/boot_speed.rs b/crates/xen/xenclient/examples/boot_speed.rs new file mode 100644 index 00000000..b56c7d96 --- /dev/null +++ b/crates/xen/xenclient/examples/boot_speed.rs @@ -0,0 +1,66 @@ +use std::sync::Arc; +use std::{env, process}; +use tokio::fs; +use uuid::Uuid; +use xenclient::config::{DomainConfig, DomainResult}; +use xenclient::error::Result; +use xenclient::tx::channel::ChannelDeviceConfig; +use xenclient::XenClient; +use xenplatform::domain::{ + KernelFormat, PlatformDomainConfig, PlatformKernelConfig, PlatformOptions, + PlatformResourcesConfig, +}; +use xenplatform::elfloader::ElfImageLoader; +use xenplatform::RuntimePlatformType; + +#[tokio::main] +async fn main() -> Result<()> { + env_logger::init(); + + let args: Vec = env::args().collect(); + if args.len() != 2 { + println!("usage: boot-speed "); + process::exit(1); + } + let kernel_path = args.get(1).expect("argument not specified"); + let kernel = Arc::new(fs::read(kernel_path).await?); + let kernel = ElfImageLoader::load(kernel)?.into_elf_bytes(); + let client = XenClient::new().await?; + + for i in 0..5u32 { + let start = std::time::Instant::now(); + let domain = create_domain(&client, kernel.clone(), i).await?; + let end = std::time::Instant::now(); + let duration = end - start; + println!("boot setup time: {:?}", duration); + client.destroy(domain.platform.domid).await?; + } + Ok(()) +} + +async fn create_domain(client: &XenClient, kernel: Arc>, i: u32) -> Result { + let mut config = DomainConfig::new(); + config.platform(PlatformDomainConfig { + uuid: Uuid::new_v4(), + platform: RuntimePlatformType::Pv, + kernel: PlatformKernelConfig { + data: kernel, + format: KernelFormat::ElfUncompressed, + cmdline: "earlyprintk=xen earlycon=xen console=hvc0 init=/init".to_string(), + initrd: None, + }, + resources: PlatformResourcesConfig { + max_vcpus: 1, + assigned_vcpus: 1, + max_memory_mb: 512, + assigned_memory_mb: 512, + }, + options: PlatformOptions { iommu: true }, + }); + config.name(format!("xenboot-{}", i)); + config.start(false); + let mut channel = ChannelDeviceConfig::new(); + channel.default_console().backend_initialized(); + config.add_channel(channel); + client.create(config).await +} diff --git a/crates/xen/xenclient/src/config.rs b/crates/xen/xenclient/src/config.rs new file mode 100644 index 00000000..81704bb3 --- /dev/null +++ b/crates/xen/xenclient/src/config.rs @@ -0,0 +1,185 @@ +use std::collections::HashMap; + +use xencall::XenCall; +pub use xenplatform::domain::PlatformDomainConfig; +use xenplatform::domain::PlatformDomainInfo; + +use crate::{ + error::Result, + tx::{ + channel::ChannelDeviceConfig, + fs9p::Fs9pDeviceConfig, + pci::PciRootDeviceConfig, + vbd::VbdDeviceConfig, + vif::VifDeviceConfig, + {BlockDeviceResult, DeviceResult}, + }, +}; + +pub struct DomainConfig { + platform: Option, + name: Option, + backend_domid: u32, + channels: Vec, + vifs: Vec, + vbds: Vec, + fs9ps: Vec, + pci: Option, + extra_keys: HashMap, + extra_rw_paths: Vec, + start: bool, +} + +impl Default for DomainConfig { + fn default() -> Self { + Self::new() + } +} + +impl DomainConfig { + pub fn new() -> Self { + Self { + platform: None, + name: None, + backend_domid: 0, + channels: Vec::new(), + vifs: Vec::new(), + vbds: Vec::new(), + fs9ps: Vec::new(), + pci: None, + extra_keys: HashMap::new(), + extra_rw_paths: Vec::new(), + start: true, + } + } + + pub fn platform(&mut self, platform: PlatformDomainConfig) -> &mut Self { + self.platform = Some(platform); + self + } + + pub fn get_platform(&self) -> &Option { + &self.platform + } + + pub fn name(&mut self, name: impl AsRef) -> &mut Self { + self.name = Some(name.as_ref().to_string()); + self + } + + pub fn get_name(&self) -> &Option { + &self.name + } + + pub fn backend_domid(&mut self, backend_domid: u32) -> &mut Self { + self.backend_domid = backend_domid; + self + } + + pub fn get_backend_domid(&self) -> u32 { + self.backend_domid + } + + pub fn add_channel(&mut self, channel: ChannelDeviceConfig) -> &mut Self { + self.channels.push(channel); + self + } + + pub fn get_channels(&self) -> &Vec { + &self.channels + } + + pub fn add_vif(&mut self, vif: VifDeviceConfig) -> &mut Self { + self.vifs.push(vif); + self + } + + pub fn get_vifs(&self) -> &Vec { + &self.vifs + } + + pub fn add_vbd(&mut self, vbd: VbdDeviceConfig) -> &mut Self { + self.vbds.push(vbd); + self + } + + pub fn get_vbds(&self) -> &Vec { + &self.vbds + } + + pub fn add_fs9p(&mut self, fs9p: Fs9pDeviceConfig) -> &mut Self { + self.fs9ps.push(fs9p); + self + } + + pub fn get_fs9ps(&self) -> &Vec { + &self.fs9ps + } + + pub fn pci(&mut self, pci: PciRootDeviceConfig) -> &mut Self { + self.pci = Some(pci); + self + } + + pub fn get_pci(&self) -> &Option { + &self.pci + } + + pub fn add_extra_key(&mut self, key: impl AsRef, value: impl ToString) -> &mut Self { + self.extra_keys + .insert(key.as_ref().to_string(), value.to_string()); + self + } + + pub fn get_extra_keys(&self) -> &HashMap { + &self.extra_keys + } + + pub fn add_rw_path(&mut self, path: impl AsRef) -> &mut Self { + self.extra_rw_paths.push(path.as_ref().to_string()); + self + } + + pub fn get_rw_paths(&self) -> &Vec { + &self.extra_rw_paths + } + + pub fn start(&mut self, start: bool) -> &mut Self { + self.start = start; + self + } + + pub fn get_start(&self) -> bool { + self.start + } + + pub fn done(self) -> Self { + self + } + + pub(crate) async fn prepare( + &mut self, + domid: u32, + call: &XenCall, + platform: &PlatformDomainInfo, + ) -> Result<()> { + if let Some(pci) = self.pci.as_mut() { + pci.prepare(domid, call).await?; + } + + for channel in &mut self.channels { + channel.prepare(platform).await?; + } + + Ok(()) + } +} + +pub struct DomainResult { + pub platform: PlatformDomainInfo, + pub channels: Vec, + pub vifs: Vec, + pub vbds: Vec, + pub fs9ps: Vec, + pub pci: Option, +} diff --git a/crates/xen/xenclient/src/devalloc.rs b/crates/xen/xenclient/src/devalloc.rs new file mode 100644 index 00000000..ef3ababa --- /dev/null +++ b/crates/xen/xenclient/src/devalloc.rs @@ -0,0 +1,79 @@ +use bit_vec::BitVec; + +const DEVICE_COUNT: usize = 4096; +const BYTE_COUNT: usize = DEVICE_COUNT / 8; + +pub struct DeviceIdAllocator { + states: BitVec, + cursor: u32, +} + +impl Default for DeviceIdAllocator { + fn default() -> Self { + Self::new() + } +} + +impl DeviceIdAllocator { + pub fn new() -> Self { + Self { + states: BitVec::from_elem(DEVICE_COUNT, false), + cursor: 0, + } + } + + pub fn deserialize(bytes: &[u8]) -> Option { + if bytes.len() != BYTE_COUNT + 4 { + return None; + } + + let cursor = bytes[0] as u32 + | ((bytes[1] as u32) << 8) + | ((bytes[2] as u32) << 16) + | ((bytes[3] as u32) << 24); + let slice = &bytes[4..BYTE_COUNT + 4]; + if slice.len() != BYTE_COUNT { + return None; + } + let states = BitVec::from_bytes(slice); + + Some(Self { states, cursor }) + } + + pub fn allocate(&mut self) -> Option { + let start = self.cursor; + loop { + let id = self.cursor; + let value = self.states.get(self.cursor as usize)?; + + self.cursor = (self.cursor + 1) % DEVICE_COUNT as u32; + + if !value { + self.states.set(id as usize, true); + return Some(id); + } + + if self.cursor == start { + return None; + } + } + } + + pub fn release(&mut self, id: u32) { + self.states.set(id as usize, false); + } + + pub fn count_free(&mut self) -> u32 { + self.states.count_zeros() as u32 + } + + pub fn serialize(&mut self) -> Vec { + let mut bytes = Vec::with_capacity(BYTE_COUNT + 4); + bytes.push((self.cursor & 0xff) as u8); + bytes.push(((self.cursor >> 8) & 0xff) as u8); + bytes.push(((self.cursor >> 16) & 0xff) as u8); + bytes.push(((self.cursor >> 24) & 0xff) as u8); + bytes.extend_from_slice(&self.states.to_bytes()); + bytes + } +} diff --git a/crates/xen/xenclient/src/devstate.rs b/crates/xen/xenclient/src/devstate.rs new file mode 100644 index 00000000..e085594a --- /dev/null +++ b/crates/xen/xenclient/src/devstate.rs @@ -0,0 +1,131 @@ +use std::time::Duration; + +use tokio::{ + select, + time::{sleep, timeout}, +}; +use xenstore::{XsdClient, XsdInterface}; + +use crate::error::{Error, Result}; + +pub struct DeviceLocator { + pub frontend_domid: u32, + pub backend_domid: u32, + pub frontend_type: String, + pub backend_type: String, + pub device_id: u64, +} + +impl DeviceLocator { + pub fn new( + frontend_domid: u32, + backend_domid: u32, + frontend_type: String, + backend_type: String, + device_id: u64, + ) -> Self { + DeviceLocator { + frontend_domid, + backend_domid, + frontend_type, + backend_type, + device_id, + } + } + + pub fn frontend_state_path(&self) -> String { + format!( + "/local/domain/{}/device/{}/{}/state", + self.frontend_domid, self.frontend_type, self.device_id + ) + } + + pub fn backend_state_path(&self) -> String { + format!( + "/local/domain/{}/backend/{}/{}/{}/state", + self.backend_domid, self.backend_type, self.frontend_domid, self.device_id + ) + } +} + +pub struct DeviceStateWaiter { + devices: Vec, + xsd: XsdClient, +} + +impl DeviceStateWaiter { + pub fn new(xsd: XsdClient) -> Self { + DeviceStateWaiter { + devices: vec![], + xsd, + } + } + + pub fn add_device(&mut self, device: DeviceLocator) -> &mut DeviceStateWaiter { + self.devices.push(device); + self + } + + async fn check_states(xsd: &XsdClient, state_paths: &[String], desired: u32) -> Result { + let mut ready = 0; + for state_path in state_paths { + let Some(state_text) = xsd.read_string(state_path).await? else { + return Err(Error::DevStateWaitError(format!( + "state path '{}' did not exist", + state_path + ))); + }; + + let Some(state_value) = state_text.parse::().ok() else { + return Err(Error::DevStateWaitError(format!( + "state path '{}' did not have a valid value", + state_path + ))); + }; + + if state_value > desired { + return Err(Error::DevStateWaitError(format!( + "state path '{}' had a state of {} which is greater than {}", + state_path, state_value, desired + ))); + } + + if state_value == desired { + ready += 1; + } + } + Ok(ready == state_paths.len()) + } + + async fn do_wait(self, desired: u32) -> Result<()> { + let mut watch = self.xsd.create_multi_watch().await?; + let mut state_paths = Vec::new(); + for device in self.devices { + let state_path = device.backend_state_path(); + self.xsd.bind_watch_id(watch.id, &state_path).await?; + state_paths.push(state_path); + } + + loop { + if DeviceStateWaiter::check_states(&self.xsd, &state_paths, desired).await? { + break; + } + + select! { + _update = watch.receiver.recv() => {}, + _timeout = sleep(Duration::from_millis(250)) => {}, + } + } + Ok(()) + } + + pub async fn wait(self, desired: u32, deadline: Duration) -> Result<()> { + if let Some(err) = timeout(deadline, self.do_wait(desired)).await.err() { + return Err(Error::DevStateWaitError(format!( + "took too long for devices to be ready: {}", + err + ))); + } + Ok(()) + } +} diff --git a/crates/xen/xenclient/src/error.rs b/crates/xen/xenclient/src/error.rs index 9c185000..31da636d 100644 --- a/crates/xen/xenclient/src/error.rs +++ b/crates/xen/xenclient/src/error.rs @@ -34,6 +34,8 @@ pub enum Error { RegexError(#[from] regex::Error), #[error("error: {0}")] GenericError(String), + #[error("parameter missing: {0}")] + ParameterMissing(&'static str), #[error("failed to parse int: {0}")] ParseIntError(#[from] std::num::ParseIntError), #[error("invalid pci bdf string")] @@ -42,6 +44,12 @@ pub enum Error { PciDeviceNotAssignable(PciBdf), #[error("xen platform error: {0}")] XenPlatform(#[from] xenplatform::error::Error), + #[error("invalid block index")] + InvalidBlockIdx, + #[error("device state wait error: {0}")] + DevStateWaitError(String), + #[error("device ids exhausted")] + DevIdExhausted, } pub type Result = std::result::Result; diff --git a/crates/xen/xenclient/src/lib.rs b/crates/xen/xenclient/src/lib.rs index 7954ddaa..8aa6c636 100644 --- a/crates/xen/xenclient/src/lib.rs +++ b/crates/xen/xenclient/src/lib.rs @@ -1,12 +1,11 @@ pub mod error; -use crate::error::{Error, Result}; +use config::{DomainConfig, DomainResult}; +use error::{Error, Result}; use log::{debug, trace}; -use pci::PciBdf; use tokio::time::timeout; -use tx::ClientTransaction; -use xenplatform::boot::BootSetupPlatform; -use xenplatform::domain::{BaseDomainConfig, BaseDomainManager, CreatedDomain}; +use tx::{DeviceConfig, XenTransaction}; +use xenplatform::domain::{PlatformDomainInfo, PlatformDomainManager}; use std::path::PathBuf; use std::str::FromStr; @@ -15,109 +14,26 @@ use std::time::Duration; use xencall::XenCall; use xenstore::{XsdClient, XsdInterface}; +pub mod config; +pub mod devalloc; +pub mod devstate; pub mod pci; pub mod tx; +pub mod util; #[derive(Clone)] -pub struct XenClient { +pub struct XenClient { pub store: XsdClient, pub call: XenCall, - domain_manager: Arc>, -} - -#[derive(Clone, Debug)] -pub struct BlockDeviceRef { - pub path: String, - pub major: u32, - pub minor: u32, -} - -#[derive(Clone, Debug)] -pub struct DomainDisk { - pub vdev: String, - pub block: BlockDeviceRef, - pub writable: bool, -} - -#[derive(Clone, Debug)] -pub struct DomainFilesystem { - pub path: String, - pub tag: String, -} - -#[derive(Clone, Debug)] -pub struct DomainNetworkInterface { - pub mac: String, - pub mtu: u32, - pub bridge: Option, - pub script: Option, -} - -#[derive(Clone, Debug)] -pub struct DomainChannel { - pub typ: String, - pub initialized: bool, -} - -#[derive(Clone, Debug)] -pub struct DomainEventChannel { - pub name: String, -} - -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub enum DomainPciRdmReservePolicy { - Invalid, - #[default] - Strict, - Relaxed, -} - -impl DomainPciRdmReservePolicy { - pub fn to_option_str(&self) -> &str { - match self { - DomainPciRdmReservePolicy::Invalid => "-1", - DomainPciRdmReservePolicy::Strict => "0", - DomainPciRdmReservePolicy::Relaxed => "1", - } - } -} - -#[derive(Clone, Debug)] -pub struct DomainPciDevice { - pub bdf: PciBdf, - pub permissive: bool, - pub msi_translate: bool, - pub power_management: bool, - pub rdm_reserve_policy: DomainPciRdmReservePolicy, -} - -#[derive(Clone, Debug)] -pub struct DomainConfig { - pub base: BaseDomainConfig, - pub backend_domid: u32, - pub name: String, - pub disks: Vec, - pub swap_console_backend: Option, - pub channels: Vec, - pub vifs: Vec, - pub filesystems: Vec, - pub pcis: Vec, - pub extra_keys: Vec<(String, String)>, - pub extra_rw_paths: Vec, -} - -#[derive(Debug)] -pub struct CreatedChannel { - pub ring_ref: u64, - pub evtchn: u32, + domain_manager: Arc, } #[allow(clippy::too_many_arguments)] -impl XenClient

{ - pub async fn new(current_domid: u32, platform: P) -> Result> { +impl XenClient { + pub async fn new() -> Result { let store = XsdClient::open().await?; - let call: XenCall = XenCall::open(current_domid)?; - let domain_manager = BaseDomainManager::new(call.clone(), platform).await?; + let call: XenCall = XenCall::open(0)?; + let domain_manager = PlatformDomainManager::new(call.clone()).await?; Ok(XenClient { store, call, @@ -125,29 +41,47 @@ impl XenClient

{ }) } - pub async fn create(&self, config: &DomainConfig) -> Result { - let created = self.domain_manager.create(config.base.clone()).await?; - match self.init(created.domid, config, &created).await { - Ok(_) => Ok(created), + pub async fn create(&self, config: DomainConfig) -> Result { + let platform = config + .get_platform() + .as_ref() + .ok_or_else(|| Error::ParameterMissing("platform"))? + .clone(); + let platform = self.domain_manager.create(platform).await?; + match self.init(platform.domid, config, &platform).await { + Ok(result) => Ok(result), Err(err) => { // ignore since destroying a domain is best-effort when an error occurs - let _ = self.domain_manager.destroy(created.domid).await; + let _ = self.domain_manager.destroy(platform.domid).await; Err(err) } } } - pub async fn transaction(&self, domid: u32, backend_domid: u32) -> Result { - ClientTransaction::new(&self.store, domid, backend_domid).await + pub async fn transaction(&self, domid: u32, backend_domid: u32) -> Result { + XenTransaction::new(&self.store, domid, backend_domid).await } - async fn init(&self, domid: u32, config: &DomainConfig, created: &CreatedDomain) -> Result<()> { + async fn init( + &self, + domid: u32, + mut config: DomainConfig, + created: &PlatformDomainInfo, + ) -> Result { trace!("xenclient init domid={} domain={:?}", domid, created); - let transaction = self.transaction(domid, config.backend_domid).await?; - transaction - .add_domain_declaration(&config.name, &config.base, created) - .await?; - transaction.commit().await?; + let platform_config = config + .get_platform() + .as_ref() + .ok_or_else(|| Error::ParameterMissing("platform"))?; + loop { + let transaction = self.transaction(domid, config.get_backend_domid()).await?; + transaction + .add_domain_declaration(config.get_name().clone(), platform_config, created) + .await?; + if transaction.maybe_commit().await? { + break; + } + } if !self .store .introduce_domain(domid, created.store_mfn, created.store_evtchn) @@ -155,57 +89,69 @@ impl XenClient

{ { return Err(Error::IntroduceDomainFailed); } - let transaction = self.transaction(domid, config.backend_domid).await?; - transaction - .add_channel_device( - created, - 0, - &DomainChannel { - typ: config - .swap_console_backend - .clone() - .unwrap_or("xenconsoled".to_string()) - .to_string(), - initialized: true, - }, - ) - .await?; + config.prepare(domid, &self.call, created).await?; + let mut channels; + let mut vifs; + let mut vbds; + let mut fs9ps; + let mut pci_result; + loop { + let transaction = self.transaction(domid, config.get_backend_domid()).await?; + + channels = Vec::new(); + for channel in config.get_channels() { + let result = channel.add_to_transaction(&transaction).await?; + channels.push(result); + } - for (index, channel) in config.channels.iter().enumerate() { - transaction - .add_channel_device(created, index + 1, channel) - .await?; - } + vifs = Vec::new(); + for vif in config.get_vifs() { + let result = vif.add_to_transaction(&transaction).await?; + vifs.push(result); + } - for (index, disk) in config.disks.iter().enumerate() { - transaction.add_vbd_device(index, disk).await?; - } + vbds = Vec::new(); + for vbd in config.get_vbds() { + let result = vbd.add_to_transaction(&transaction).await?; + vbds.push(result); + } - for (index, filesystem) in config.filesystems.iter().enumerate() { - transaction.add_9pfs_device(index, filesystem).await?; - } + fs9ps = Vec::new(); + for fs9p in config.get_fs9ps() { + let result = fs9p.add_to_transaction(&transaction).await?; + fs9ps.push(result); + } - for (index, vif) in config.vifs.iter().enumerate() { - transaction.add_vif_device(index, vif).await?; - } + pci_result = None; + if let Some(pci) = config.get_pci().as_ref() { + pci_result = Some(pci.add_to_transaction(&transaction).await?); + } - for (index, pci) in config.pcis.iter().enumerate() { - transaction - .add_pci_device(&self.call, index, config.pcis.len(), pci) - .await?; - } + for (key, value) in config.get_extra_keys() { + transaction.write(key, value, None).await?; + } - for (key, value) in &config.extra_keys { - transaction.write_key(key, value).await?; + for rw_path in config.get_rw_paths() { + transaction.add_rw_path(rw_path).await?; + } + + if transaction.maybe_commit().await? { + break; + } } - for key in &config.extra_rw_paths { - transaction.add_rw_path(key).await?; + if config.get_start() { + self.call.unpause_domain(domid).await?; } - transaction.commit().await?; - self.call.unpause_domain(domid).await?; - Ok(()) + Ok(DomainResult { + platform: created.clone(), + channels, + vifs, + vbds, + fs9ps, + pci: pci_result, + }) } pub async fn destroy(&self, domid: u32) -> Result<()> { @@ -251,39 +197,7 @@ impl XenClient

{ } for backend in &backend_paths { - let state_path = format!("{}/state", backend); - let mut watch = self.store.create_watch(&state_path).await?; - let online_path = format!("{}/online", backend); - let tx = self.store.transaction().await?; - let state = tx.read_string(&state_path).await?.unwrap_or(String::new()); - if state.is_empty() { - break; - } - tx.write_string(&online_path, "0").await?; - if !state.is_empty() && u32::from_str(&state).unwrap_or(0) != 6 { - tx.write_string(&state_path, "5").await?; - } - self.store.bind_watch(&watch).await?; - tx.commit().await?; - - let mut count: u32 = 0; - loop { - if count >= 3 { - debug!("unable to safely destroy backend: {}", backend); - break; - } - let _ = timeout(Duration::from_secs(1), watch.receiver.recv()).await; - let state = self - .store - .read_string(&state_path) - .await? - .unwrap_or_else(|| "6".to_string()); - let state = i64::from_str(&state).unwrap_or(-1); - if state == 6 { - break; - } - count += 1; - } + self.destroy_backend(backend).await?; } let tx = self.store.transaction().await?; @@ -305,4 +219,72 @@ impl XenClient

{ tx.commit().await?; Ok(()) } + + async fn destroy_backend(&self, backend: &str) -> Result<()> { + let state_path = format!("{}/state", backend); + let mut watch = self.store.create_watch(&state_path).await?; + let online_path = format!("{}/online", backend); + let tx = self.store.transaction().await?; + let state = tx.read_string(&state_path).await?.unwrap_or(String::new()); + if state.is_empty() { + return Ok(()); + } + tx.write_string(&online_path, "0").await?; + if !state.is_empty() && u32::from_str(&state).unwrap_or(0) != 6 { + tx.write_string(&state_path, "5").await?; + } + self.store.bind_watch(&watch).await?; + tx.commit().await?; + + let mut count: u32 = 0; + loop { + if count >= 3 { + debug!("unable to safely destroy backend: {}", backend); + break; + } + let _ = timeout(Duration::from_secs(1), watch.receiver.recv()).await; + let state = self + .store + .read_string(&state_path) + .await? + .unwrap_or_else(|| "6".to_string()); + let state = i64::from_str(&state).unwrap_or(-1); + if state == 6 { + break; + } + count += 1; + } + self.store.rm(backend).await?; + Ok(()) + } + + pub async fn destroy_device( + &self, + category: &str, + domid: u32, + devid: u64, + blkid: Option, + ) -> Result<()> { + let dom_path = self.store.get_domain_path(domid).await?; + let device_path = format!("{}/device/{}/{}", dom_path, category, devid); + if let Some(backend_path) = self + .store + .read_string(format!("{}/backend", device_path).as_str()) + .await? + { + self.destroy_backend(&backend_path).await?; + } + self.destroy_backend(&device_path).await?; + loop { + let tx = self.transaction(domid, 0).await?; + tx.release_devid(devid).await?; + if let Some(blkid) = blkid { + tx.release_blkid(blkid).await?; + } + if tx.maybe_commit().await? { + break; + } + } + Ok(()) + } } diff --git a/crates/xen/xenclient/src/tx.rs b/crates/xen/xenclient/src/tx.rs deleted file mode 100644 index d73522fe..00000000 --- a/crates/xen/xenclient/src/tx.rs +++ /dev/null @@ -1,582 +0,0 @@ -use indexmap::IndexMap; -use xencall::{sys::DOMCTL_DEV_RDM_RELAXED, XenCall}; -use xenplatform::{ - domain::{BaseDomainConfig, CreatedDomain}, - sys::XEN_PAGE_SHIFT, -}; -use xenstore::{ - XsPermission, XsdClient, XsdInterface, XsdTransaction, XS_PERM_NONE, XS_PERM_READ, - XS_PERM_READ_WRITE, -}; - -use crate::{ - error::{Error, Result}, - pci::XenPciBackend, - DomainChannel, DomainDisk, DomainFilesystem, DomainNetworkInterface, DomainPciDevice, - DomainPciRdmReservePolicy, -}; - -pub struct ClientTransaction { - tx: XsdTransaction, - abort: bool, - domid: u32, - dom_path: String, - backend_domid: u32, - backend_dom_path: String, -} - -impl ClientTransaction { - pub async fn new(store: &XsdClient, domid: u32, backend_domid: u32) -> Result { - let backend_dom_path = store.get_domain_path(0).await?; - let dom_path = store.get_domain_path(domid).await?; - Ok(ClientTransaction { - tx: store.transaction().await?, - abort: true, - domid, - dom_path, - backend_domid, - backend_dom_path, - }) - } - - pub async fn add_domain_declaration( - &self, - name: impl AsRef, - base: &BaseDomainConfig, - created: &CreatedDomain, - ) -> Result<()> { - let vm_path = format!("/vm/{}", base.uuid); - let ro_perm = &[ - XsPermission { - id: 0, - perms: XS_PERM_NONE, - }, - XsPermission { - id: self.domid, - perms: XS_PERM_READ, - }, - ]; - - let no_perm = &[XsPermission { - id: 0, - perms: XS_PERM_NONE, - }]; - - let rw_perm = &[XsPermission { - id: self.domid, - perms: XS_PERM_READ_WRITE, - }]; - - self.tx.rm(&self.dom_path).await?; - self.tx.mknod(&self.dom_path, ro_perm).await?; - - self.tx.rm(&vm_path).await?; - self.tx.mknod(&vm_path, ro_perm).await?; - - self.tx.mknod(&vm_path, no_perm).await?; - self.tx - .mknod(format!("{}/device", vm_path).as_str(), no_perm) - .await?; - - self.tx - .write_string(format!("{}/vm", self.dom_path).as_str(), &vm_path) - .await?; - - self.tx - .mknod(format!("{}/cpu", self.dom_path).as_str(), ro_perm) - .await?; - self.tx - .mknod(format!("{}/memory", self.dom_path).as_str(), ro_perm) - .await?; - - self.tx - .mknod(format!("{}/control", self.dom_path).as_str(), ro_perm) - .await?; - - self.tx - .mknod( - format!("{}/control/shutdown", self.dom_path).as_str(), - rw_perm, - ) - .await?; - self.tx - .mknod( - format!("{}/control/feature-poweroff", self.dom_path).as_str(), - rw_perm, - ) - .await?; - self.tx - .mknod( - format!("{}/control/feature-reboot", self.dom_path).as_str(), - rw_perm, - ) - .await?; - self.tx - .mknod( - format!("{}/control/feature-suspend", self.dom_path).as_str(), - rw_perm, - ) - .await?; - self.tx - .mknod(format!("{}/control/sysrq", self.dom_path).as_str(), rw_perm) - .await?; - - self.tx - .mknod(format!("{}/data", self.dom_path).as_str(), rw_perm) - .await?; - self.tx - .mknod(format!("{}/drivers", self.dom_path).as_str(), rw_perm) - .await?; - self.tx - .mknod(format!("{}/feature", self.dom_path).as_str(), rw_perm) - .await?; - self.tx - .mknod(format!("{}/attr", self.dom_path).as_str(), rw_perm) - .await?; - self.tx - .mknod(format!("{}/error", self.dom_path).as_str(), rw_perm) - .await?; - - self.tx - .write_string(format!("{}/uuid", vm_path).as_str(), &base.uuid.to_string()) - .await?; - self.tx - .write_string(format!("{}/name", self.dom_path).as_str(), name.as_ref()) - .await?; - self.tx - .write_string(format!("{}/name", vm_path).as_str(), name.as_ref()) - .await?; - - self.tx - .write_string(format!("{}/image/os_type", vm_path).as_str(), "linux") - .await?; - self.tx - .write_string(format!("{}/image/cmdline", vm_path).as_str(), &base.cmdline) - .await?; - self.tx - .write_string( - format!("{}/memory/static-max", self.dom_path).as_str(), - &(base.max_mem_mb * 1024).to_string(), - ) - .await?; - self.tx - .write_string( - format!("{}/memory/target", self.dom_path).as_str(), - &(base.target_mem_mb * 1024).to_string(), - ) - .await?; - self.tx - .write_string(format!("{}/memory/videoram", self.dom_path).as_str(), "0") - .await?; - self.tx - .write_string( - format!("{}/domid", self.dom_path).as_str(), - &created.domid.to_string(), - ) - .await?; - self.tx - .write_string(format!("{}/type", self.dom_path).as_str(), "PV") - .await?; - self.tx - .write_string( - format!("{}/store/port", self.dom_path).as_str(), - &created.store_evtchn.to_string(), - ) - .await?; - self.tx - .write_string( - format!("{}/store/ring-ref", self.dom_path).as_str(), - &created.store_mfn.to_string(), - ) - .await?; - for i in 0..base.max_vcpus { - let path = format!("{}/cpu/{}", self.dom_path, i); - self.tx.mkdir(&path).await?; - self.tx.set_perms(&path, ro_perm).await?; - let path = format!("{}/cpu/{}/availability", self.dom_path, i); - self.tx - .write_string( - &path, - if i < base.target_vcpus { - "online" - } else { - "offline" - }, - ) - .await?; - self.tx.set_perms(&path, ro_perm).await?; - } - Ok(()) - } - - pub async fn write_key(&self, key: impl AsRef, value: impl AsRef) -> Result<()> { - self.tx - .write_string( - &format!("{}/{}", self.dom_path, key.as_ref()), - value.as_ref(), - ) - .await?; - Ok(()) - } - - pub async fn add_rw_path(&self, key: impl AsRef) -> Result<()> { - let rw_perm = &[XsPermission { - id: self.domid, - perms: XS_PERM_READ_WRITE, - }]; - - self.tx - .mknod(&format!("{}/{}", self.dom_path, key.as_ref()), rw_perm) - .await?; - Ok(()) - } - - pub async fn add_device( - &self, - typ: impl AsRef, - id: u64, - frontend_items: Vec<(&str, String)>, - backend_items: Vec<(&str, String)>, - ) -> Result<()> { - let console_zero = typ.as_ref() == "console" && id == 0; - - let frontend_path = if console_zero { - format!("{}/console", self.dom_path) - } else { - format!("{}/device/{}/{}", self.dom_path, typ.as_ref(), id) - }; - let backend_path = format!( - "{}/backend/{}/{}/{}", - self.backend_dom_path, - typ.as_ref(), - self.domid, - id - ); - - let mut backend_items: Vec<(&str, String)> = backend_items.clone(); - let mut frontend_items: Vec<(&str, String)> = frontend_items.clone(); - backend_items.push(("frontend", frontend_path.clone())); - frontend_items.push(("backend", backend_path.clone())); - let frontend_perms = &[ - XsPermission { - id: self.domid, - perms: XS_PERM_NONE, - }, - XsPermission { - id: self.backend_domid, - perms: XS_PERM_READ, - }, - ]; - - let backend_perms = &[ - XsPermission { - id: self.backend_domid, - perms: XS_PERM_NONE, - }, - XsPermission { - id: self.domid, - perms: XS_PERM_READ, - }, - ]; - - self.tx.mknod(&frontend_path, frontend_perms).await?; - for (p, value) in &frontend_items { - let path = format!("{}/{}", frontend_path, *p); - self.tx.write_string(&path, value).await?; - if !console_zero { - self.tx.set_perms(&path, frontend_perms).await?; - } - } - self.tx.mknod(&backend_path, backend_perms).await?; - for (p, value) in &backend_items { - let path = format!("{}/{}", backend_path, *p); - self.tx.write_string(&path, value).await?; - } - Ok(()) - } - - pub async fn add_vbd_device(&self, index: usize, disk: &DomainDisk) -> Result<()> { - let id = (202 << 8) | (index << 4) as u64; - let backend_items: Vec<(&str, String)> = vec![ - ("frontend-id", self.domid.to_string()), - ("online", "1".to_string()), - ("removable", "0".to_string()), - ("bootable", "1".to_string()), - ("state", "1".to_string()), - ("dev", disk.vdev.to_string()), - ("type", "phy".to_string()), - ("mode", if disk.writable { "w" } else { "r" }.to_string()), - ("device-type", "disk".to_string()), - ("discard-enable", "0".to_string()), - ("specification", "xen".to_string()), - ("physical-device-path", disk.block.path.to_string()), - ( - "physical-device", - format!("{:02x}:{:02x}", disk.block.major, disk.block.minor), - ), - ]; - - let frontend_items: Vec<(&str, String)> = vec![ - ("backend-id", self.backend_domid.to_string()), - ("state", "1".to_string()), - ("virtual-device", id.to_string()), - ("device-type", "disk".to_string()), - ("trusted", "1".to_string()), - ("protocol", "x86_64-abi".to_string()), - ]; - - self.add_device("vbd", id, frontend_items, backend_items) - .await?; - Ok(()) - } - - pub async fn add_vif_device(&self, index: usize, vif: &DomainNetworkInterface) -> Result<()> { - let id = 20 + index as u64; - let mut backend_items: Vec<(&str, String)> = vec![ - ("frontend-id", self.domid.to_string()), - ("online", "1".to_string()), - ("state", "1".to_string()), - ("mac", vif.mac.to_string()), - ("mtu", vif.mtu.to_string()), - ("type", "vif".to_string()), - ("handle", id.to_string()), - ]; - - if vif.bridge.is_some() { - backend_items.extend_from_slice(&[("bridge", vif.bridge.clone().unwrap())]); - } - - if vif.script.is_some() { - backend_items.extend_from_slice(&[ - ("script", vif.script.clone().unwrap()), - ("hotplug-status", "".to_string()), - ]); - } else { - backend_items.extend_from_slice(&[ - ("script", "".to_string()), - ("hotplug-status", "connected".to_string()), - ]); - } - - let frontend_items: Vec<(&str, String)> = vec![ - ("backend-id", self.backend_domid.to_string()), - ("state", "1".to_string()), - ("mac", vif.mac.to_string()), - ("trusted", "1".to_string()), - ("mtu", vif.mtu.to_string()), - ]; - - self.add_device("vif", id, frontend_items, backend_items) - .await?; - Ok(()) - } - - pub async fn add_9pfs_device(&self, index: usize, filesystem: &DomainFilesystem) -> Result<()> { - let id = 90 + index as u64; - let backend_items: Vec<(&str, String)> = vec![ - ("frontend-id", self.domid.to_string()), - ("online", "1".to_string()), - ("state", "1".to_string()), - ("path", filesystem.path.to_string()), - ("security-model", "none".to_string()), - ]; - - let frontend_items: Vec<(&str, String)> = vec![ - ("backend-id", self.backend_domid.to_string()), - ("state", "1".to_string()), - ("tag", filesystem.tag.to_string()), - ]; - - self.add_device("9pfs", id, frontend_items, backend_items) - .await?; - Ok(()) - } - - pub async fn add_channel_device( - &self, - domain: &CreatedDomain, - index: usize, - channel: &DomainChannel, - ) -> Result<()> { - let port = domain.console_evtchn; - let ring = domain.console_mfn; - - let mut backend_items = vec![ - ("frontend-id", self.domid.to_string()), - ("online", "1".to_string()), - ("protocol", "vt100".to_string()), - ]; - - let mut frontend_items = vec![ - ("backend-id", self.backend_domid.to_string()), - ("limit", "1048576".to_string()), - ("output", "pty".to_string()), - ("tty", "".to_string()), - ]; - - frontend_items.push(("type", channel.typ.clone())); - backend_items.push(("type", channel.typ.clone())); - - if index == 0 { - if channel.typ != "xenconsoled" { - frontend_items.push(("state", "1".to_string())); - } - - frontend_items - .extend_from_slice(&[("port", port.to_string()), ("ring-ref", ring.to_string())]); - } else { - frontend_items.extend_from_slice(&[ - ("state", "1".to_string()), - ("protocol", "vt100".to_string()), - ]); - } - - if channel.initialized { - backend_items.push(("state", "4".to_string())); - } else { - backend_items.push(("state", "1".to_string())); - } - - self.add_device("console", index as u64, frontend_items, backend_items) - .await?; - Ok(()) - } - - pub async fn add_pci_device( - &self, - call: &XenCall, - index: usize, - device_count: usize, - device: &DomainPciDevice, - ) -> Result<()> { - let backend = XenPciBackend::new(); - if !backend.is_assigned(&device.bdf).await? { - return Err(Error::PciDeviceNotAssignable(device.bdf)); - } - let resources = backend.read_resources(&device.bdf).await?; - for resource in resources { - if resource.is_bar_io() { - call.ioport_permission( - self.domid, - resource.start as u32, - resource.size() as u32, - true, - ) - .await?; - } else { - call.iomem_permission( - self.domid, - resource.start >> XEN_PAGE_SHIFT, - (resource.size() + (XEN_PAGE_SHIFT - 1)) >> XEN_PAGE_SHIFT, - true, - ) - .await?; - } - } - - if let Some(irq) = backend.read_irq(&device.bdf).await? { - let irq = call.map_pirq(self.domid, irq as isize, None).await?; - call.irq_permission(self.domid, irq, true).await?; - } - - backend.reset(&device.bdf).await?; - - call.assign_device( - self.domid, - device.bdf.encode(), - if device.rdm_reserve_policy == DomainPciRdmReservePolicy::Relaxed { - DOMCTL_DEV_RDM_RELAXED - } else { - 0 - }, - ) - .await?; - - if device.permissive { - backend.enable_permissive(&device.bdf).await?; - } - - let id = 60; - - if index == 0 { - let backend_items: Vec<(&str, String)> = vec![ - ("frontend-id", self.domid.to_string()), - ("online", "1".to_string()), - ("state", "1".to_string()), - ("num_devs", device_count.to_string()), - ]; - - let frontend_items: Vec<(&str, String)> = vec![ - ("backend-id", self.backend_domid.to_string()), - ("state", "1".to_string()), - ]; - - self.add_device("pci", id, frontend_items, backend_items) - .await?; - } - - let backend_path = format!( - "{}/backend/{}/{}/{}", - self.backend_dom_path, "pci", self.domid, id - ); - - self.tx - .write_string( - format!("{}/key-{}", backend_path, index), - &device.bdf.to_string(), - ) - .await?; - self.tx - .write_string( - format!("{}/dev-{}", backend_path, index), - &device.bdf.to_string(), - ) - .await?; - - if let Some(vdefn) = device.bdf.vdefn { - self.tx - .write_string( - format!("{}/vdefn-{}", backend_path, index), - &format!("{:#x}", vdefn), - ) - .await?; - } - - let mut options = IndexMap::new(); - options.insert("permissive", if device.permissive { "1" } else { "0" }); - options.insert("rdm_policy", device.rdm_reserve_policy.to_option_str()); - options.insert("msitranslate", if device.msi_translate { "1" } else { "0" }); - options.insert( - "power_mgmt", - if device.power_management { "1" } else { "0" }, - ); - let options = options - .into_iter() - .map(|(key, value)| format!("{}={}", key, value)) - .collect::>() - .join(","); - - self.tx - .write_string(format!("{}/opts-{}", backend_path, index), &options) - .await?; - Ok(()) - } - - pub async fn commit(mut self) -> Result<()> { - self.abort = false; - self.tx.commit().await?; - Ok(()) - } -} - -impl Drop for ClientTransaction { - fn drop(&mut self) { - if !self.abort { - return; - } - let tx = self.tx.clone(); - tokio::task::spawn(async move { - let _ = tx.abort().await; - }); - } -} diff --git a/crates/xen/xenclient/src/tx/channel.rs b/crates/xen/xenclient/src/tx/channel.rs new file mode 100644 index 00000000..1702a3e2 --- /dev/null +++ b/crates/xen/xenclient/src/tx/channel.rs @@ -0,0 +1,89 @@ +use xenplatform::domain::PlatformDomainInfo; + +use super::{DeviceConfig, DeviceDescription, DeviceResult, XenTransaction}; +use crate::error::{Error, Result}; + +pub struct ChannelDeviceConfig { + backend_type: String, + default_console: bool, + default_console_options: Option<(u32, u64)>, + backend_initialized: bool, +} + +impl Default for ChannelDeviceConfig { + fn default() -> Self { + Self::new() + } +} + +impl ChannelDeviceConfig { + pub fn new() -> Self { + Self { + backend_type: "console".to_string(), + default_console: false, + default_console_options: None, + backend_initialized: false, + } + } + + pub fn backend_type(&mut self, backend_type: impl AsRef) -> &mut Self { + self.backend_type = backend_type.as_ref().to_string(); + self + } + + pub fn default_console(&mut self) -> &mut Self { + self.default_console = true; + self + } + + pub fn backend_initialized(&mut self) -> &mut Self { + self.backend_initialized = true; + self + } + + pub fn done(self) -> Self { + self + } + + pub async fn prepare(&mut self, platform: &PlatformDomainInfo) -> Result<()> { + if self.default_console { + self.default_console_options = Some((platform.console_evtchn, platform.console_mfn)); + } + Ok(()) + } +} + +#[async_trait::async_trait] +impl DeviceConfig for ChannelDeviceConfig { + type Result = DeviceResult; + + async fn add_to_transaction(&self, tx: &XenTransaction) -> Result { + let id = tx.assign_next_devid().await?; + let mut device = DeviceDescription::new("console", &self.backend_type); + device + .add_backend_bool("online", true) + .add_backend_item("protocol", "vt100") + .add_backend_item("type", &self.backend_type) + .add_backend_item("state", if self.backend_initialized { 4 } else { 1 }); + + if self.default_console { + device.special_frontend_path("console"); + let (port, ring_ref) = self + .default_console_options + .as_ref() + .ok_or_else(|| Error::ParameterMissing("default_console_options"))?; + device + .add_frontend_item("port", port) + .add_frontend_item("ring-ref", ring_ref); + } + + device + .add_frontend_item("limit", 1048576) + .add_frontend_item("output", "pty") + .add_frontend_item("tty", "") + .add_frontend_item("type", &self.backend_type) + .add_frontend_item("state", 1); + tx.add_device(id, device).await?; + Ok(DeviceResult { id }) + } +} diff --git a/crates/xen/xenclient/src/tx/fs9p.rs b/crates/xen/xenclient/src/tx/fs9p.rs new file mode 100644 index 00000000..952020fd --- /dev/null +++ b/crates/xen/xenclient/src/tx/fs9p.rs @@ -0,0 +1,78 @@ +use super::{DeviceConfig, DeviceDescription, DeviceResult, XenTransaction}; +use crate::error::{Error, Result}; + +pub struct Fs9pDeviceConfig { + backend_type: String, + security_model: String, + path: Option, + tag: Option, +} + +impl Default for Fs9pDeviceConfig { + fn default() -> Self { + Self::new() + } +} + +impl Fs9pDeviceConfig { + pub fn new() -> Self { + Self { + backend_type: "9pfs".to_string(), + security_model: "none".to_string(), + path: None, + tag: None, + } + } + + pub fn backend_type(&mut self, backend_type: impl AsRef) -> &mut Self { + self.backend_type = backend_type.as_ref().to_string(); + self + } + + pub fn security_model(&mut self, security_model: impl AsRef) -> &mut Self { + self.security_model = security_model.as_ref().to_string(); + self + } + + pub fn path(&mut self, path: impl AsRef) -> &mut Self { + self.path = Some(path.as_ref().to_string()); + self + } + + pub fn tag(&mut self, tag: impl AsRef) -> &mut Self { + self.tag = Some(tag.as_ref().to_string()); + self + } + + pub fn done(self) -> Self { + self + } +} + +#[async_trait::async_trait] +impl DeviceConfig for Fs9pDeviceConfig { + type Result = DeviceResult; + + async fn add_to_transaction(&self, tx: &XenTransaction) -> Result { + let id = tx.assign_next_devid().await?; + let path = self + .path + .as_ref() + .ok_or_else(|| Error::ParameterMissing("path"))?; + let tag = self + .tag + .as_ref() + .ok_or_else(|| Error::ParameterMissing("tag"))?; + let mut device = DeviceDescription::new("9pfs", &self.backend_type); + device + .add_backend_bool("online", true) + .add_backend_item("state", 1) + .add_backend_item("path", path) + .add_backend_item("security_model", &self.security_model); + device + .add_frontend_item("state", 1) + .add_frontend_item("tag", tag); + tx.add_device(id, device).await?; + Ok(DeviceResult { id }) + } +} diff --git a/crates/xen/xenclient/src/tx/mod.rs b/crates/xen/xenclient/src/tx/mod.rs new file mode 100644 index 00000000..e2bc39f8 --- /dev/null +++ b/crates/xen/xenclient/src/tx/mod.rs @@ -0,0 +1,425 @@ +pub mod channel; +pub mod fs9p; +pub mod pci; +pub mod vbd; +pub mod vif; + +use crate::{ + devalloc::DeviceIdAllocator, + error::{Error, Result}, +}; +use std::{collections::HashMap, sync::Arc}; +use tokio::sync::Mutex; +use xenplatform::domain::{PlatformDomainConfig, PlatformDomainInfo}; +use xenstore::{ + XsPermission, XsdClient, XsdInterface, XsdTransaction, XS_PERM_NONE, XS_PERM_READ, + XS_PERM_READ_WRITE, +}; + +pub struct XenTransaction { + frontend_domid: u32, + frontend_dom_path: String, + backend_domid: u32, + backend_dom_path: String, + blkalloc: Arc>, + devalloc: Arc>, + tx: XsdTransaction, + abort: bool, +} + +impl XenTransaction { + pub async fn new(store: &XsdClient, frontend_domid: u32, backend_domid: u32) -> Result { + let frontend_dom_path = store.get_domain_path(frontend_domid).await?; + let backend_dom_path = store.get_domain_path(backend_domid).await?; + let tx = store.transaction().await?; + + let devalloc = XenTransaction::load_id_allocator(&tx, "devid", &frontend_dom_path).await?; + let blkalloc = XenTransaction::load_id_allocator(&tx, "blkid", &frontend_dom_path).await?; + + Ok(XenTransaction { + frontend_domid, + frontend_dom_path, + backend_domid, + backend_dom_path, + tx, + devalloc: Arc::new(Mutex::new(devalloc)), + blkalloc: Arc::new(Mutex::new(blkalloc)), + abort: true, + }) + } + + async fn load_id_allocator( + tx: &XsdTransaction, + allocator_type: &str, + frontend_dom_path: &str, + ) -> Result { + let state = tx + .read(format!( + "{}/{}-alloc-state", + frontend_dom_path, allocator_type + )) + .await?; + let allocator = state + .and_then(|state| DeviceIdAllocator::deserialize(&state)) + .unwrap_or_else(DeviceIdAllocator::new); + Ok(allocator) + } + + pub async fn assign_next_devid(&self) -> Result { + self.devalloc + .lock() + .await + .allocate() + .ok_or(Error::DevIdExhausted) + .map(|x| x as u64) + } + + pub async fn assign_next_blkidx(&self) -> Result { + self.blkalloc + .lock() + .await + .allocate() + .ok_or(Error::DevIdExhausted) + } + + pub async fn release_devid(&self, devid: u64) -> Result<()> { + self.devalloc.lock().await.release(devid as u32); + Ok(()) + } + + pub async fn release_blkid(&self, blkid: u32) -> Result<()> { + self.blkalloc.lock().await.release(blkid); + Ok(()) + } + + pub async fn write( + &self, + key: impl AsRef, + value: impl AsRef, + perms: Option<&[XsPermission]>, + ) -> Result<()> { + let path = format!("{}/{}", self.frontend_dom_path, key.as_ref()); + if let Some(perms) = perms { + self.tx.mknod(&path, perms).await?; + } + + // empty string is written by mknod, if perms is set we can skip it. + if perms.is_none() || perms.is_some() && !value.as_ref().is_empty() { + self.tx.write_string(path, value.as_ref()).await?; + } + Ok(()) + } + + pub async fn add_domain_declaration( + &self, + name: Option>, + platform: &PlatformDomainConfig, + created: &PlatformDomainInfo, + ) -> Result<()> { + let vm_path = format!("/vm/{}", platform.uuid); + let ro_perm = &[ + XsPermission { + id: 0, + perms: XS_PERM_NONE, + }, + XsPermission { + id: self.frontend_domid, + perms: XS_PERM_READ, + }, + ]; + + let no_perm = &[XsPermission { + id: 0, + perms: XS_PERM_NONE, + }]; + + let rw_perm = &[XsPermission { + id: self.frontend_domid, + perms: XS_PERM_READ_WRITE, + }]; + + self.tx.rm(&self.frontend_dom_path).await?; + self.tx.mknod(&self.frontend_dom_path, ro_perm).await?; + + self.tx.rm(&vm_path).await?; + self.tx.mknod(&vm_path, no_perm).await?; + self.tx + .write_string(format!("{}/uuid", vm_path), &platform.uuid.to_string()) + .await?; + + self.write("vm", &vm_path, None).await?; + self.write("cpu", "", Some(ro_perm)).await?; + self.write("memory", "", Some(ro_perm)).await?; + self.write("control", "", Some(ro_perm)).await?; + self.write("control/shutdown", "", Some(rw_perm)).await?; + self.write("control/feature-poweroff", "", Some(rw_perm)) + .await?; + self.write("control/feature-reboot", "", Some(rw_perm)) + .await?; + self.write("control/feature-suspend", "", Some(rw_perm)) + .await?; + self.write("control/sysrq", "", Some(rw_perm)).await?; + self.write("data", "", Some(rw_perm)).await?; + self.write("drivers", "", Some(rw_perm)).await?; + self.write("feature", "", Some(rw_perm)).await?; + self.write("attr", "", Some(rw_perm)).await?; + self.write("error", "", Some(rw_perm)).await?; + self.write("uuid", platform.uuid.to_string(), Some(ro_perm)) + .await?; + if let Some(name) = name { + self.write("name", name.as_ref(), Some(ro_perm)).await?; + } + self.write( + "memory/static-max", + (platform.resources.max_memory_mb * 1024).to_string(), + None, + ) + .await?; + self.write( + "memory/target", + (platform.resources.assigned_memory_mb * 1024).to_string(), + None, + ) + .await?; + self.write("memory/videoram", "0", None).await?; + self.write("domid", self.frontend_domid.to_string(), None) + .await?; + self.write("type", "PV", None).await?; + self.write("store/port", created.store_evtchn.to_string(), None) + .await?; + self.write("store/ring-ref", created.store_mfn.to_string(), None) + .await?; + for i in 0..platform.resources.max_vcpus { + let path = format!("{}/cpu/{}", self.frontend_dom_path, i); + self.tx.mkdir(&path).await?; + self.tx.set_perms(&path, ro_perm).await?; + let path = format!("{}/cpu/{}/availability", self.frontend_dom_path, i); + self.tx + .write_string( + &path, + if i < platform.resources.assigned_vcpus { + "online" + } else { + "offline" + }, + ) + .await?; + self.tx.set_perms(&path, ro_perm).await?; + } + Ok(()) + } + + pub async fn add_device(&self, id: u64, device: DeviceDescription) -> Result<()> { + let frontend_path = if let Some(ref special_frontend_path) = device.special_frontend_path { + format!("{}/{}", self.frontend_dom_path, special_frontend_path) + } else { + format!( + "{}/device/{}/{}", + self.frontend_dom_path, device.frontend_type, id + ) + }; + let backend_path = format!( + "{}/backend/{}/{}/{}", + self.backend_dom_path, device.backend_type, self.frontend_domid, id + ); + + let frontend_perms = &[ + XsPermission { + id: self.frontend_domid, + perms: XS_PERM_READ_WRITE, + }, + XsPermission { + id: self.backend_domid, + perms: XS_PERM_READ, + }, + ]; + + let backend_perms = &[ + XsPermission { + id: self.backend_domid, + perms: XS_PERM_READ_WRITE, + }, + XsPermission { + id: self.frontend_domid, + perms: XS_PERM_READ, + }, + ]; + + self.tx.mknod(&frontend_path, frontend_perms).await?; + self.tx.mknod(&backend_path, backend_perms).await?; + + for (key, value) in &device.backend_items { + let path = format!("{}/{}", backend_path, key); + self.tx.write_string(&path, value).await?; + } + + self.tx + .write_string(format!("{}/frontend", backend_path), &frontend_path) + .await?; + self.tx + .write_string( + format!("{}/frontend-id", backend_path), + &self.frontend_domid.to_string(), + ) + .await?; + for (key, value) in &device.frontend_items { + let path = format!("{}/{}", frontend_path, key); + self.tx.write_string(&path, value).await?; + if device.special_frontend_path.is_none() { + self.tx.set_perms(&path, frontend_perms).await?; + } + } + self.tx + .write_string(format!("{}/backend", frontend_path), &backend_path) + .await?; + self.tx + .write_string( + format!("{}/backend-id", frontend_path), + &self.backend_domid.to_string(), + ) + .await?; + Ok(()) + } + + pub async fn add_rw_path(&self, key: impl AsRef) -> Result<()> { + let rw_perm = &[XsPermission { + id: self.frontend_domid, + perms: XS_PERM_READ_WRITE, + }]; + + self.tx + .mknod( + &format!("{}/{}", self.frontend_dom_path, key.as_ref()), + rw_perm, + ) + .await?; + Ok(()) + } + + async fn before_commit(&self) -> Result<()> { + let devid_allocator_state = self.devalloc.lock().await.serialize(); + let blkid_allocator_state = self.blkalloc.lock().await.serialize(); + self.tx + .write( + format!("{}/devid-alloc-state", self.frontend_dom_path), + devid_allocator_state, + ) + .await?; + self.tx + .write( + format!("{}/blkid-alloc-state", self.frontend_dom_path), + blkid_allocator_state, + ) + .await?; + Ok(()) + } + + pub async fn maybe_commit(mut self) -> Result { + self.abort = false; + self.before_commit().await?; + Ok(self.tx.maybe_commit().await?) + } + + pub async fn commit(mut self) -> Result<()> { + self.abort = false; + self.before_commit().await?; + self.tx.commit().await?; + Ok(()) + } +} + +impl Drop for XenTransaction { + fn drop(&mut self) { + if !self.abort { + return; + } + let tx = self.tx.clone(); + tokio::task::spawn(async move { + let _ = tx.abort().await; + }); + } +} + +pub struct DeviceDescription { + frontend_type: String, + backend_type: String, + special_frontend_path: Option, + frontend_items: HashMap, + backend_items: HashMap, +} + +impl DeviceDescription { + pub fn new(frontend_type: impl AsRef, backend_type: impl AsRef) -> Self { + Self { + frontend_type: frontend_type.as_ref().to_string(), + backend_type: backend_type.as_ref().to_string(), + special_frontend_path: None, + frontend_items: HashMap::new(), + backend_items: HashMap::new(), + } + } + + pub fn special_frontend_path(&mut self, path: impl AsRef) -> &mut Self { + self.special_frontend_path = Some(path.as_ref().to_string()); + self + } + + pub fn add_frontend_item(&mut self, key: impl AsRef, value: impl ToString) -> &mut Self { + self.frontend_items + .insert(key.as_ref().to_string(), value.to_string()); + self + } + + pub fn add_backend_item(&mut self, key: impl AsRef, value: impl ToString) -> &mut Self { + self.backend_items + .insert(key.as_ref().to_string(), value.to_string()); + self + } + + pub fn add_frontend_bool(&mut self, key: impl AsRef, value: bool) -> &mut Self { + self.add_frontend_item(key, if value { "1" } else { "0" }) + } + + pub fn add_backend_bool(&mut self, key: impl AsRef, value: bool) -> &mut Self { + self.add_backend_item(key, if value { "1" } else { "0" }) + } + + pub fn done(self) -> Self { + self + } +} + +#[derive(Clone, Debug)] +pub struct DeviceResult { + pub id: u64, +} + +#[derive(Clone, Debug)] +pub struct BlockDeviceResult { + pub id: u64, + pub idx: u32, +} + +#[async_trait::async_trait] +pub trait DeviceConfig { + type Result; + + async fn add_to_transaction(&self, tx: &XenTransaction) -> Result; +} + +#[derive(Clone, Debug)] +pub struct BlockDeviceRef { + pub path: String, + pub major: u32, + pub minor: u32, +} + +impl BlockDeviceRef { + pub fn new(path: impl AsRef, major: u32, minor: u32) -> Self { + Self { + path: path.as_ref().to_string(), + major, + minor, + } + } +} diff --git a/crates/xen/xenclient/src/tx/pci.rs b/crates/xen/xenclient/src/tx/pci.rs new file mode 100644 index 00000000..c3461cfe --- /dev/null +++ b/crates/xen/xenclient/src/tx/pci.rs @@ -0,0 +1,194 @@ +use super::{DeviceConfig, DeviceDescription, DeviceResult, XenTransaction}; +use crate::{ + error::{Error, Result}, + pci::{PciBdf, XenPciBackend}, +}; +use indexmap::IndexMap; +use xencall::{sys::DOMCTL_DEV_RDM_RELAXED, XenCall}; +use xenplatform::sys::XEN_PAGE_SHIFT; + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub enum PciRdmReservePolicy { + Invalid, + #[default] + Strict, + Relaxed, +} + +impl PciRdmReservePolicy { + pub fn to_option_str(&self) -> &str { + match self { + PciRdmReservePolicy::Invalid => "-1", + PciRdmReservePolicy::Strict => "0", + PciRdmReservePolicy::Relaxed => "1", + } + } +} + +pub struct PciDeviceConfig { + bdf: PciBdf, + rdm_reserve_policy: PciRdmReservePolicy, + permissive: bool, + msi_translate: bool, + power_management: bool, +} + +pub struct PciRootDeviceConfig { + backend_type: String, + devices: Vec, +} + +impl PciDeviceConfig { + pub fn new(bdf: PciBdf) -> Self { + Self { + bdf, + rdm_reserve_policy: PciRdmReservePolicy::Strict, + permissive: false, + msi_translate: false, + power_management: false, + } + } + + pub fn rdm_reserve_policy(&mut self, rdm_reserve_policy: PciRdmReservePolicy) -> &mut Self { + self.rdm_reserve_policy = rdm_reserve_policy; + self + } + + pub fn permissive(&mut self, permissive: bool) -> &mut Self { + self.permissive = permissive; + self + } + + pub fn msi_translate(&mut self, msi_translate: bool) -> &mut Self { + self.msi_translate = msi_translate; + self + } + + pub fn power_management(&mut self, power_management: bool) -> &mut Self { + self.power_management = power_management; + self + } + + pub fn done(self) -> Self { + self + } +} + +impl Default for PciRootDeviceConfig { + fn default() -> Self { + Self::new() + } +} + +impl PciRootDeviceConfig { + pub fn new() -> Self { + Self { + backend_type: "pci".to_string(), + devices: Vec::new(), + } + } + + pub fn backend_type(&mut self, backend_type: impl AsRef) -> &mut Self { + self.backend_type = backend_type.as_ref().to_string(); + self + } + + pub fn add_device(&mut self, device: PciDeviceConfig) -> &mut Self { + self.devices.push(device); + self + } + + pub async fn prepare(&self, domid: u32, call: &XenCall) -> Result<()> { + for device in &self.devices { + let backend = XenPciBackend::new(); + if !backend.is_assigned(&device.bdf).await? { + return Err(Error::PciDeviceNotAssignable(device.bdf)); + } + let resources = backend.read_resources(&device.bdf).await?; + for resource in resources { + if resource.is_bar_io() { + call.ioport_permission( + domid, + resource.start as u32, + resource.size() as u32, + true, + ) + .await?; + } else { + call.iomem_permission( + domid, + resource.start >> XEN_PAGE_SHIFT, + (resource.size() + (XEN_PAGE_SHIFT - 1)) >> XEN_PAGE_SHIFT, + true, + ) + .await?; + } + } + + if let Some(irq) = backend.read_irq(&device.bdf).await? { + let irq = call.map_pirq(domid, irq as isize, None).await?; + call.irq_permission(domid, irq, true).await?; + } + + backend.reset(&device.bdf).await?; + + call.assign_device( + domid, + device.bdf.encode(), + if device.rdm_reserve_policy == PciRdmReservePolicy::Relaxed { + DOMCTL_DEV_RDM_RELAXED + } else { + 0 + }, + ) + .await?; + + if device.permissive { + backend.enable_permissive(&device.bdf).await?; + } + } + Ok(()) + } + + pub fn done(self) -> Self { + self + } +} + +#[async_trait::async_trait] +impl DeviceConfig for PciRootDeviceConfig { + type Result = DeviceResult; + + async fn add_to_transaction(&self, tx: &XenTransaction) -> Result { + let id = tx.assign_next_devid().await?; + let mut device = DeviceDescription::new("pci", &self.backend_type); + device + .add_backend_bool("online", true) + .add_backend_item("state", 1) + .add_backend_item("num_devs", self.devices.len()); + + for (index, pci) in self.devices.iter().enumerate() { + let mut options = IndexMap::new(); + options.insert("permissive", if pci.permissive { "1" } else { "0" }); + options.insert("rdm_policy", pci.rdm_reserve_policy.to_option_str()); + options.insert("msitranslate", if pci.msi_translate { "1" } else { "0" }); + let options = options + .into_iter() + .map(|(key, value)| format!("{}={}", key, value)) + .collect::>() + .join(","); + device + .add_backend_item(format!("key-{}", index), pci.bdf.to_string()) + .add_backend_item(format!("dev-{}", index), pci.bdf.to_string()) + .add_backend_item(format!("opts-{}", index), options); + + if let Some(vdefn) = pci.bdf.vdefn { + device.add_backend_item(format!("vdefn-{}", index), format!("{:#x}", vdefn)); + } + } + + device.add_frontend_item("state", 1); + tx.add_device(id, device).await?; + Ok(DeviceResult { id }) + } +} diff --git a/crates/xen/xenclient/src/tx/vbd.rs b/crates/xen/xenclient/src/tx/vbd.rs new file mode 100644 index 00000000..0bfa0ac5 --- /dev/null +++ b/crates/xen/xenclient/src/tx/vbd.rs @@ -0,0 +1,131 @@ +use super::{BlockDeviceRef, BlockDeviceResult, DeviceConfig, DeviceDescription, XenTransaction}; +use crate::{ + error::{Error, Result}, + util::vbd_blkidx_to_disk_name, +}; + +pub struct VbdDeviceConfig { + backend_type: String, + removable: bool, + bootable: bool, + writable: bool, + discard: bool, + trusted: bool, + block_device: Option, +} + +impl Default for VbdDeviceConfig { + fn default() -> Self { + Self::new() + } +} + +impl VbdDeviceConfig { + pub fn new() -> Self { + Self { + backend_type: "vbd".to_string(), + removable: false, + bootable: true, + writable: false, + discard: false, + trusted: true, + block_device: None, + } + } + + pub fn backend_type(&mut self, backend_type: impl AsRef) -> &mut Self { + self.backend_type = backend_type.as_ref().to_string(); + self + } + + pub fn removable(&mut self, removable: bool) -> &mut Self { + self.removable = removable; + self + } + + pub fn bootable(&mut self, bootable: bool) -> &mut Self { + self.bootable = bootable; + self + } + + pub fn writable(&mut self, writable: bool) -> &mut Self { + self.writable = writable; + self + } + + pub fn discard(&mut self, discard: bool) -> &mut Self { + self.discard = discard; + self + } + + pub fn trusted(&mut self, trusted: bool) -> &mut Self { + self.trusted = trusted; + self + } + + pub fn block_device(&mut self, block_device: BlockDeviceRef) -> &mut Self { + self.block_device = Some(block_device); + self + } + + pub fn done(self) -> Self { + self + } +} + +#[async_trait::async_trait] +impl DeviceConfig for VbdDeviceConfig { + type Result = BlockDeviceResult; + + async fn add_to_transaction(&self, tx: &XenTransaction) -> Result { + let id = tx.assign_next_devid().await?; + let idx = tx.assign_next_blkidx().await?; + let vdev = vbd_blkidx_to_disk_name(idx)?; + let block_device = self + .block_device + .as_ref() + .ok_or_else(|| Error::ParameterMissing("block device"))?; + + let mut device = DeviceDescription::new("vbd", &self.backend_type); + device + .add_backend_item("online", 1) + .add_backend_bool("removable", self.removable) + .add_backend_bool("bootable", self.bootable) + .add_backend_item("type", "phy") + .add_backend_item("device-type", "disk") + .add_backend_item("discard-enable", self.discard) + .add_backend_item("specification", "xen") + .add_backend_item("physical-device-path", &block_device.path) + .add_backend_item("mode", if self.writable { "w" } else { "r" }) + .add_backend_item( + "physical-device", + format!("{:02x}:{:02x}", block_device.major, block_device.minor), + ) + .add_backend_item("dev", &vdev) + .add_backend_item("state", 1); + + // we should use standard virtual-device support for first few block devices. + // the kernel warns when you use ext for indexes 5 or less, due to + // potential id overlapping. + let (vdev, vd_key) = if idx <= 5 { + // shift by 4 as partition count is 16 + ((202 << 8) | (idx as u64 * 16u64), "virtual-device") + } else { + // this is silly but 256 is the number of partitions + // multiply the index by that to get the actual id + ((1u64 << 28u64) + (idx as u64) * 256, "virtual-device-ext") + }; + + device + .add_frontend_item(vd_key, vdev) + .add_frontend_item("state", 1) + .add_frontend_item("device-type", "disk") + .add_frontend_bool("trusted", self.trusted) + .add_frontend_item("protocol", "x86_64-abi") + .add_frontend_item("x-index", idx); + + tx.add_device(id, device).await?; + + Ok(BlockDeviceResult { id, idx }) + } +} diff --git a/crates/xen/xenclient/src/tx/vif.rs b/crates/xen/xenclient/src/tx/vif.rs new file mode 100644 index 00000000..0ff093fa --- /dev/null +++ b/crates/xen/xenclient/src/tx/vif.rs @@ -0,0 +1,112 @@ +use super::{DeviceConfig, DeviceDescription, DeviceResult, XenTransaction}; +use crate::error::{Error, Result}; + +pub struct VifDeviceConfig { + backend_type: String, + mac: Option, + mtu: Option, + script: Option, + bridge: Option, + trusted: bool, +} + +impl Default for VifDeviceConfig { + fn default() -> Self { + Self::new() + } +} + +impl VifDeviceConfig { + pub fn new() -> Self { + Self { + backend_type: "vif".to_string(), + mac: None, + mtu: None, + script: None, + bridge: None, + trusted: true, + } + } + + pub fn backend_type(&mut self, backend_type: impl AsRef) -> &mut Self { + self.backend_type = backend_type.as_ref().to_string(); + self + } + + pub fn mac(&mut self, mac: impl AsRef) -> &mut Self { + self.mac = Some(mac.as_ref().to_string()); + self + } + + pub fn mtu(&mut self, mtu: u32) -> &mut Self { + self.mtu = Some(mtu); + self + } + + pub fn script(&mut self, script: impl AsRef) -> &mut Self { + self.script = Some(script.as_ref().to_string()); + self + } + + pub fn bridge(&mut self, bridge: impl AsRef) -> &mut Self { + self.bridge = Some(bridge.as_ref().to_string()); + self + } + + pub fn trusted(&mut self, trusted: bool) -> &mut Self { + self.trusted = trusted; + self + } + + pub fn done(self) -> Self { + self + } +} + +#[async_trait::async_trait] +impl DeviceConfig for VifDeviceConfig { + type Result = DeviceResult; + + async fn add_to_transaction(&self, tx: &XenTransaction) -> Result { + let id = tx.assign_next_devid().await?; + let mac = self + .mac + .as_ref() + .ok_or_else(|| Error::ParameterMissing("mac address"))?; + let mtu = self + .mtu + .ok_or_else(|| Error::ParameterMissing("mtu"))? + .to_string(); + let mut device = DeviceDescription::new("vif", &self.backend_type); + device + .add_backend_item("online", 1) + .add_backend_item("state", 1) + .add_backend_item("mac", mac) + .add_backend_item("mtu", &mtu) + .add_backend_item("type", "vif") + .add_backend_item("handle", id); + + if let Some(bridge) = self.bridge.as_ref() { + device.add_backend_item("bridge", bridge); + } + + if let Some(script) = self.script.as_ref() { + device + .add_backend_item("script", script) + .add_backend_item("hotplug-status", ""); + } else { + device + .add_backend_item("script", "") + .add_backend_item("hotplug-status", "connected"); + } + + device + .add_frontend_item("state", 1) + .add_frontend_item("mac", mac) + .add_frontend_item("mtu", &mtu) + .add_frontend_bool("trusted", self.trusted); + + tx.add_device(id, device.done()).await?; + Ok(DeviceResult { id }) + } +} diff --git a/crates/xen/xenclient/src/util.rs b/crates/xen/xenclient/src/util.rs new file mode 100644 index 00000000..4bb7e644 --- /dev/null +++ b/crates/xen/xenclient/src/util.rs @@ -0,0 +1,21 @@ +use crate::error::{Error, Result}; + +pub fn vbd_blkidx_to_disk_name(blkid: u32) -> Result { + let mut name = "xvd".to_string(); + let mut suffix = String::new(); + let mut n = blkid; + loop { + let c = (n % 26) as u8; + let c = b'a' + c; + let c = char::from_u32(c as u32).ok_or(Error::InvalidBlockIdx)?; + suffix.push(c); + if n >= 26 { + n = (n / 26) - 1; + continue; + } else { + break; + } + } + name.push_str(&suffix.chars().rev().collect::()); + Ok(name) +} diff --git a/crates/xen/xenplatform/src/boot.rs b/crates/xen/xenplatform/src/boot.rs index b7668463..8c9e1865 100644 --- a/crates/xen/xenplatform/src/boot.rs +++ b/crates/xen/xenplatform/src/boot.rs @@ -8,16 +8,9 @@ use crate::{ error::{Error, Result}, mem::PhysicalPages, sys::XEN_PAGE_SHIFT, + ImageLoader, PlatformKernelConfig, PlatformResourcesConfig, }; -pub struct BootSetup { - pub call: XenCall, - pub domid: u32, - pub platform: P, - pub image_loader: I, - pub dtb: Option>, -} - #[derive(Debug, Default, Clone)] pub struct DomainSegment { pub vstart: u64, @@ -42,7 +35,7 @@ pub struct BootDomain { pub phys: PhysicalPages, pub store_evtchn: u32, pub store_mfn: u64, - pub initrd_segment: DomainSegment, + pub initrd_segment: Option, pub console_evtchn: u32, pub console_mfn: u64, pub cmdline: String, @@ -142,111 +135,162 @@ impl BootDomain { } } -impl BootSetup { - pub fn new( - call: XenCall, - domid: u32, - platform: P, - image_loader: I, - dtb: Option>, - ) -> BootSetup { - BootSetup { - call, - domid, - platform, - image_loader, - dtb, - } - } +#[async_trait::async_trait] +pub trait BootSetupPlatform { + fn create_domain(&self, enable_iommu: bool) -> CreateDomain; + fn page_size(&self) -> u64; + fn page_shift(&self) -> u64; + fn needs_early_kernel(&self) -> bool; + fn hvm(&self) -> bool; + + async fn initialize_early(&mut self, domain: &mut BootDomain) -> Result<()>; + + async fn initialize_memory(&mut self, domain: &mut BootDomain) -> Result<()>; + + async fn alloc_page_tables(&mut self, domain: &mut BootDomain) + -> Result>; + + async fn alloc_p2m_segment(&mut self, domain: &mut BootDomain) + -> Result>; + + async fn alloc_magic_pages(&mut self, domain: &mut BootDomain) -> Result<()>; - pub async fn initialize( + async fn setup_page_tables(&mut self, domain: &mut BootDomain) -> Result<()>; + + async fn setup_shared_info( &mut self, - initrd: &[u8], - target_mem_mb: u64, - max_mem_mb: u64, - max_vcpus: u32, - cmdline: &str, - ) -> Result { - let target_pages = target_mem_mb << (20 - self.platform.page_shift()); - let total_pages = max_mem_mb << (20 - self.platform.page_shift()); - let image_info = self.image_loader.parse(self.platform.hvm()).await?; - let mut domain = BootDomain { - domid: self.domid, - call: self.call.clone(), - virt_alloc_end: 0, - virt_pgtab_end: 0, - pfn_alloc_end: 0, - total_pages, - target_pages, - page_size: self.platform.page_size(), - image_info, - console_evtchn: 0, - console_mfn: 0, - max_vcpus, - phys: PhysicalPages::new(self.call.clone(), self.domid, self.platform.page_shift()), - initrd_segment: DomainSegment::default(), - store_evtchn: 0, - store_mfn: 0, - cmdline: cmdline.to_string(), - }; + domain: &mut BootDomain, + shared_info_frame: u64, + ) -> Result<()>; - self.platform.initialize_early(&mut domain).await?; + async fn setup_start_info( + &mut self, + domain: &mut BootDomain, + shared_info_frame: u64, + ) -> Result<()>; + + async fn bootlate(&mut self, domain: &mut BootDomain) -> Result<()>; - let mut initrd_segment = if !domain.image_info.unmapped_initrd { - Some(domain.alloc_module(initrd).await?) + async fn gnttab_seed(&mut self, domain: &mut BootDomain) -> Result<()>; + + async fn vcpu(&mut self, domain: &mut BootDomain) -> Result<()>; + + async fn setup_hypercall_page(&mut self, domain: &mut BootDomain) -> Result<()>; + + async fn initialize_internal( + &mut self, + domid: u32, + call: XenCall, + image_loader: &ImageLoader, + domain: &mut BootDomain, + kernel: &PlatformKernelConfig, + ) -> Result<()> { + self.initialize_early(domain).await?; + + let mut initrd_segment = if !domain.image_info.unmapped_initrd && kernel.initrd.is_some() { + Some(domain.alloc_module(kernel.initrd.as_ref().unwrap()).await?) } else { None }; - let mut kernel_segment = if self.platform.needs_early_kernel() { - Some(self.load_kernel_segment(&mut domain).await?) + let mut kernel_segment = if self.needs_early_kernel() { + Some(self.load_kernel_segment(image_loader, domain).await?) } else { None }; - self.platform.initialize_memory(&mut domain).await?; + self.initialize_memory(domain).await?; domain.virt_alloc_end = domain.image_info.virt_base; if kernel_segment.is_none() { - kernel_segment = Some(self.load_kernel_segment(&mut domain).await?); + kernel_segment = Some(self.load_kernel_segment(image_loader, domain).await?); } - if domain.image_info.unmapped_initrd { - initrd_segment = Some(domain.alloc_module(initrd).await?); + if domain.image_info.unmapped_initrd && kernel.initrd.is_some() { + initrd_segment = Some(domain.alloc_module(kernel.initrd.as_ref().unwrap()).await?); } - domain.initrd_segment = - initrd_segment.ok_or(Error::MemorySetupFailed("initrd_segment missing"))?; - - self.platform.alloc_magic_pages(&mut domain).await?; - - domain.store_evtchn = self.call.evtchn_alloc_unbound(self.domid, 0).await?; - + domain.initrd_segment = initrd_segment; + self.alloc_magic_pages(domain).await?; + domain.store_evtchn = call.evtchn_alloc_unbound(domid, 0).await?; let _kernel_segment = kernel_segment.ok_or(Error::MemorySetupFailed("kernel_segment missing"))?; + Ok(()) + } - Ok(domain) + #[allow(clippy::too_many_arguments)] + async fn initialize( + &mut self, + domid: u32, + call: XenCall, + image_loader: &ImageLoader, + kernel: &PlatformKernelConfig, + resources: &PlatformResourcesConfig, + ) -> Result { + let target_pages = resources.assigned_memory_mb << (20 - self.page_shift()); + let total_pages = resources.max_memory_mb << (20 - self.page_shift()); + let image_info = image_loader.parse(self.hvm()).await?; + let mut domain = BootDomain { + domid, + call: call.clone(), + virt_alloc_end: 0, + virt_pgtab_end: 0, + pfn_alloc_end: 0, + total_pages, + target_pages, + page_size: self.page_size(), + image_info, + console_evtchn: 0, + console_mfn: 0, + max_vcpus: resources.max_vcpus, + phys: PhysicalPages::new(call.clone(), domid, self.page_shift()), + initrd_segment: None, + store_evtchn: 0, + store_mfn: 0, + cmdline: kernel.cmdline.clone(), + }; + match self + .initialize_internal(domid, call, image_loader, &mut domain, kernel) + .await + { + Ok(_) => Ok(domain), + Err(error) => { + domain.phys.unmap_all()?; + Err(error) + } + } } - pub async fn boot(&mut self, domain: &mut BootDomain) -> Result<()> { - let domain_info = self.call.get_domain_info(self.domid).await?; + async fn boot_internal( + &mut self, + call: XenCall, + domid: u32, + domain: &mut BootDomain, + ) -> Result<()> { + let domain_info = call.get_domain_info(domid).await?; let shared_info_frame = domain_info.shared_info_frame; - self.platform.setup_page_tables(domain).await?; - self.platform - .setup_start_info(domain, shared_info_frame) - .await?; - self.platform.setup_hypercall_page(domain).await?; - self.platform.bootlate(domain).await?; - self.platform - .setup_shared_info(domain, shared_info_frame) - .await?; - self.platform.vcpu(domain).await?; + self.setup_page_tables(domain).await?; + self.setup_start_info(domain, shared_info_frame).await?; + self.setup_hypercall_page(domain).await?; + self.bootlate(domain).await?; + self.setup_shared_info(domain, shared_info_frame).await?; + self.vcpu(domain).await?; + self.gnttab_seed(domain).await?; domain.phys.unmap_all()?; - self.platform.gnttab_seed(domain).await?; Ok(()) } - async fn load_kernel_segment(&mut self, domain: &mut BootDomain) -> Result { + async fn boot(&mut self, domid: u32, call: XenCall, domain: &mut BootDomain) -> Result<()> { + let result = self.boot_internal(call, domid, domain).await; + domain.phys.unmap_all()?; + result + } + + async fn load_kernel_segment( + &mut self, + image_loader: &ImageLoader, + domain: &mut BootDomain, + ) -> Result { let kernel_segment = domain .alloc_segment( domain.image_info.virt_kstart, @@ -256,56 +300,13 @@ impl BootSetup { let kernel_segment_ptr = kernel_segment.addr as *mut u8; let kernel_segment_slice = unsafe { slice::from_raw_parts_mut(kernel_segment_ptr, kernel_segment.size as usize) }; - self.image_loader + image_loader .load(&domain.image_info, kernel_segment_slice) .await?; Ok(kernel_segment) } } -#[async_trait::async_trait] -pub trait BootSetupPlatform: Clone { - fn create_domain(&self, enable_iommu: bool) -> CreateDomain; - fn page_size(&self) -> u64; - fn page_shift(&self) -> u64; - fn needs_early_kernel(&self) -> bool; - fn hvm(&self) -> bool; - - async fn initialize_early(&mut self, domain: &mut BootDomain) -> Result<()>; - - async fn initialize_memory(&mut self, domain: &mut BootDomain) -> Result<()>; - - async fn alloc_page_tables(&mut self, domain: &mut BootDomain) - -> Result>; - - async fn alloc_p2m_segment(&mut self, domain: &mut BootDomain) - -> Result>; - - async fn alloc_magic_pages(&mut self, domain: &mut BootDomain) -> Result<()>; - - async fn setup_page_tables(&mut self, domain: &mut BootDomain) -> Result<()>; - - async fn setup_shared_info( - &mut self, - domain: &mut BootDomain, - shared_info_frame: u64, - ) -> Result<()>; - - async fn setup_start_info( - &mut self, - domain: &mut BootDomain, - shared_info_frame: u64, - ) -> Result<()>; - - async fn bootlate(&mut self, domain: &mut BootDomain) -> Result<()>; - - async fn gnttab_seed(&mut self, domain: &mut BootDomain) -> Result<()>; - - async fn vcpu(&mut self, domain: &mut BootDomain) -> Result<()>; - - async fn setup_hypercall_page(&mut self, domain: &mut BootDomain) -> Result<()>; -} - #[async_trait::async_trait] pub trait BootImageLoader { async fn parse(&self, hvm: bool) -> Result; diff --git a/crates/xen/xenplatform/src/domain.rs b/crates/xen/xenplatform/src/domain.rs index 48d7a8b1..9937b2f5 100644 --- a/crates/xen/xenplatform/src/domain.rs +++ b/crates/xen/xenplatform/src/domain.rs @@ -1,9 +1,10 @@ use std::sync::Arc; use crate::{ - boot::{BootSetup, BootSetupPlatform}, - elfloader::ElfImageLoader, + boot::BootDomain, elfloader::ElfImageLoader, error::Error, ImageLoader, RuntimePlatform, + RuntimePlatformType, }; +use log::warn; use uuid::Uuid; use xencall::XenCall; @@ -11,42 +12,92 @@ use crate::error::Result; pub const XEN_EXTRA_MEMORY_KB: u64 = 2048; -pub struct BaseDomainManager { +pub struct PlatformDomainManager { call: XenCall, - pub platform: Arc

, } -impl BaseDomainManager

{ - pub async fn new(call: XenCall, platform: P) -> Result> { - Ok(BaseDomainManager { - call, - platform: Arc::new(platform), - }) +impl PlatformDomainManager { + pub async fn new(call: XenCall) -> Result { + Ok(PlatformDomainManager { call }) + } + + fn max_memory_kb(resources: &PlatformResourcesConfig) -> u64 { + (resources.max_memory_mb * 1024) + XEN_EXTRA_MEMORY_KB } - pub async fn create(&self, config: BaseDomainConfig) -> Result { - let mut domain = self.platform.create_domain(config.enable_iommu); + async fn create_base_domain( + &self, + config: &PlatformDomainConfig, + platform: &RuntimePlatform, + ) -> Result { + let mut domain = platform.create_domain(config.options.iommu); domain.handle = config.uuid.into_bytes(); - domain.max_vcpus = config.max_vcpus; + domain.max_vcpus = config.resources.max_vcpus; let domid = self.call.create_domain(domain).await?; - self.call.set_max_vcpus(domid, config.max_vcpus).await?; + Ok(domid) + } + + async fn configure_domain_resources( + &self, + domid: u32, + config: &PlatformDomainConfig, + ) -> Result<()> { + self.call + .set_max_vcpus(domid, config.resources.max_vcpus) + .await?; self.call - .set_max_mem(domid, (config.max_mem_mb * 1024) + XEN_EXTRA_MEMORY_KB) + .set_max_mem( + domid, + PlatformDomainManager::max_memory_kb(&config.resources), + ) .await?; - let loader = ElfImageLoader::load_file_kernel(&config.kernel)?; - let platform = (*self.platform).clone(); - let mut boot = BootSetup::new(self.call.clone(), domid, platform, loader, None); - let mut domain = boot + Ok(()) + } + + async fn create_internal( + &self, + domid: u32, + config: &PlatformDomainConfig, + mut platform: RuntimePlatform, + ) -> Result { + self.configure_domain_resources(domid, config).await?; + let kernel = config.kernel.clone(); + let loader = tokio::task::spawn_blocking(move || match kernel.format { + KernelFormat::ElfCompressed => ElfImageLoader::load(kernel.data), + KernelFormat::ElfUncompressed => Ok(ElfImageLoader::new(kernel.data)), + }) + .await + .map_err(Error::AsyncJoinError)??; + let loader = ImageLoader::Elf(loader); + let mut domain = platform .initialize( - &config.initrd, - config.target_mem_mb, - config.max_mem_mb, - config.max_vcpus, - &config.cmdline, + domid, + self.call.clone(), + &loader, + &config.kernel, + &config.resources, ) .await?; - boot.boot(&mut domain).await?; - Ok(CreatedDomain { + platform.boot(domid, self.call.clone(), &mut domain).await?; + Ok(domain) + } + + pub async fn create(&self, config: PlatformDomainConfig) -> Result { + let platform = config.platform.create(); + let domid = self.create_base_domain(&config, &platform).await?; + let domain = match self.create_internal(domid, &config, platform).await { + Ok(domain) => domain, + Err(error) => { + if let Err(destroy_fail) = self.call.destroy_domain(domid).await { + warn!( + "failed to destroy failed domain {}: {}", + domid, destroy_fail + ); + } + return Err(error); + } + }; + Ok(PlatformDomainInfo { domid, store_evtchn: domain.store_evtchn, store_mfn: domain.store_mfn, @@ -62,21 +113,43 @@ impl BaseDomainManager

{ } #[derive(Clone, Debug)] -pub struct BaseDomainConfig { +pub struct PlatformDomainConfig { pub uuid: Uuid, - pub owner_domid: u32, - pub max_vcpus: u32, - pub target_vcpus: u32, - pub max_mem_mb: u64, - pub target_mem_mb: u64, - pub kernel: Vec, - pub initrd: Vec, + pub platform: RuntimePlatformType, + pub resources: PlatformResourcesConfig, + pub kernel: PlatformKernelConfig, + pub options: PlatformOptions, +} + +#[derive(Clone, Debug)] +pub struct PlatformKernelConfig { + pub data: Arc>, + pub format: KernelFormat, + pub initrd: Option>>, pub cmdline: String, - pub enable_iommu: bool, } #[derive(Clone, Debug)] -pub struct CreatedDomain { +pub struct PlatformResourcesConfig { + pub max_vcpus: u32, + pub assigned_vcpus: u32, + pub max_memory_mb: u64, + pub assigned_memory_mb: u64, +} + +#[derive(Clone, Debug)] +pub struct PlatformOptions { + pub iommu: bool, +} + +#[derive(Clone, Debug)] +pub enum KernelFormat { + ElfUncompressed, + ElfCompressed, +} + +#[derive(Clone, Debug)] +pub struct PlatformDomainInfo { pub domid: u32, pub store_evtchn: u32, pub store_mfn: u64, diff --git a/crates/xen/xenplatform/src/elfloader.rs b/crates/xen/xenplatform/src/elfloader.rs index 354d6cb4..35d81280 100644 --- a/crates/xen/xenplatform/src/elfloader.rs +++ b/crates/xen/xenplatform/src/elfloader.rs @@ -19,6 +19,12 @@ use std::mem::size_of; use std::sync::Arc; use xz2::bufread::XzDecoder; +const ELF_MAGIC: &[u8] = &[ + 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +]; +const GZIP_MAGIC: &[u8] = &[0x1f, 0x8b]; +const XZ_MAGIC: &[u8] = &[0xfd, 0x37, 0x7a, 0x58]; + #[derive(Clone)] pub struct ElfImageLoader { data: Arc>, @@ -60,22 +66,40 @@ fn xen_note_value_as_u64(endian: AnyEndian, value: &[u8]) -> Option { } impl ElfImageLoader { - pub fn new(data: Vec) -> ElfImageLoader { - ElfImageLoader { - data: Arc::new(data), - } + pub fn new(data: Arc>) -> ElfImageLoader { + ElfImageLoader { data } } pub fn load_gz(data: &[u8]) -> Result { let buff = BufReader::new(data); let image = ElfImageLoader::read_one_stream(&mut GzDecoder::new(buff))?; - Ok(ElfImageLoader::new(image)) + Ok(ElfImageLoader::new(Arc::new(image))) } pub fn load_xz(data: &[u8]) -> Result { let buff = BufReader::new(data); let image = ElfImageLoader::read_one_stream(&mut XzDecoder::new(buff))?; - Ok(ElfImageLoader::new(image)) + Ok(ElfImageLoader::new(Arc::new(image))) + } + + pub fn load(data: Arc>) -> Result { + if data.len() >= 16 && find_iter(&data[0..15], ELF_MAGIC).next().is_some() { + return Ok(ElfImageLoader::new(data)); + } + + for start in find_iter(&data, GZIP_MAGIC) { + if let Ok(elf) = ElfImageLoader::load_gz(&data[start..]) { + return Ok(elf); + } + } + + for start in find_iter(&data, XZ_MAGIC) { + if let Ok(elf) = ElfImageLoader::load_xz(&data[start..]) { + return Ok(elf); + } + } + + Err(Error::ElfCompressionUnknown) } fn read_one_stream(read: &mut dyn Read) -> Result> { @@ -101,36 +125,11 @@ impl ElfImageLoader { Ok(result) } - pub fn load_file_gz(path: &str) -> Result { - let file = std::fs::read(path)?; - ElfImageLoader::load_gz(file.as_slice()) - } - - pub fn load_file_xz(path: &str) -> Result { - let file = std::fs::read(path)?; - ElfImageLoader::load_xz(file.as_slice()) - } - - pub fn load_file_kernel(data: &[u8]) -> Result { - for start in find_iter(data, &[0x1f, 0x8b]) { - if let Ok(elf) = ElfImageLoader::load_gz(&data[start..]) { - return Ok(elf); - } - } - - for start in find_iter(data, &[0xfd, 0x37, 0x7a, 0x58]) { - if let Ok(elf) = ElfImageLoader::load_xz(&data[start..]) { - return Ok(elf); - } - } - - Err(Error::ElfCompressionUnknown) - } - fn parse_sync(&self, hvm: bool) -> Result { let elf = ElfBytes::::minimal_parse(self.data.as_slice())?; - let headers = elf.section_headers().ok_or(Error::ElfInvalidImage)?; - let mut linux_notes: HashMap> = HashMap::new(); + let headers = elf + .section_headers() + .ok_or(Error::ElfInvalidImage("section headers missing"))?; let mut xen_notes: HashMap = HashMap::new(); for header in headers { @@ -140,62 +139,55 @@ impl ElfImageLoader { let notes = elf.section_data_as_notes(&header)?; for note in notes { - if let Note::Unknown(note) = note { - if note.name == "Linux" { - linux_notes.insert(note.n_type, note.desc.to_vec()); - } - - if note.name == "Xen" { - for typ in XEN_ELFNOTE_TYPES { - if typ.id != note.n_type { - continue; - } + let Note::Unknown(note) = note else { + continue; + }; + + if note.name == "Xen" { + for typ in XEN_ELFNOTE_TYPES { + if typ.id != note.n_type { + continue; + } - let value = if !typ.is_string { - xen_note_value_as_u64(elf.ehdr.endianness, note.desc).unwrap_or(0) - } else { - 0 - }; + let value = if !typ.is_string { + xen_note_value_as_u64(elf.ehdr.endianness, note.desc).unwrap_or(0) + } else { + 0 + }; - xen_notes.insert(typ.id, ElfNoteValue { value }); - } - continue; + xen_notes.insert(typ.id, ElfNoteValue { value }); } } } } - if linux_notes.is_empty() { - return Err(Error::ElfInvalidImage); - } - if xen_notes.is_empty() { return Err(Error::ElfXenSupportMissing); } let paddr_offset = xen_notes .get(&XEN_ELFNOTE_PADDR_OFFSET) - .ok_or(Error::ElfInvalidImage)? + .ok_or(Error::ElfXenNoteMissing("PADDR_OFFSET"))? .value; let virt_base = xen_notes .get(&XEN_ELFNOTE_VIRT_BASE) - .ok_or(Error::ElfInvalidImage)? + .ok_or(Error::ElfXenNoteMissing("VIRT_BASE"))? .value; let entry = xen_notes .get(&XEN_ELFNOTE_ENTRY) - .ok_or(Error::ElfInvalidImage)? + .ok_or(Error::ElfXenNoteMissing("ENTRY"))? .value; let virt_hypercall = xen_notes .get(&XEN_ELFNOTE_HYPERCALL_PAGE) - .ok_or(Error::ElfInvalidImage)? + .ok_or(Error::ElfXenNoteMissing("HYPERCALL_PAGE"))? .value; let init_p2m = xen_notes .get(&XEN_ELFNOTE_INIT_P2M) - .ok_or(Error::ElfInvalidImage)? + .ok_or(Error::ElfXenNoteMissing("INIT_P2M"))? .value; let mod_start_pfn = xen_notes .get(&XEN_ELFNOTE_MOD_START_PFN) - .ok_or(Error::ElfInvalidImage)? + .ok_or(Error::ElfXenNoteMissing("MOD_START_PFN"))? .value; let phys32_entry = xen_notes.get(&XEN_ELFNOTE_PHYS32_ENTRY).map(|x| x.value); @@ -203,7 +195,9 @@ impl ElfImageLoader { let mut start: u64 = u64::MAX; let mut end: u64 = 0; - let segments = elf.segments().ok_or(Error::ElfInvalidImage)?; + let segments = elf + .segments() + .ok_or(Error::ElfInvalidImage("segments missing"))?; for header in segments { if (header.p_type != PT_LOAD) || (header.p_flags & (PF_R | PF_W | PF_X)) == 0 { @@ -221,7 +215,9 @@ impl ElfImageLoader { } if paddr_offset != u64::MAX && virt_base == u64::MAX { - return Err(Error::ElfInvalidImage); + return Err(Error::ElfInvalidImage( + "paddr_offset specified, but virt_base is not specified", + )); } let virt_offset = virt_base - paddr_offset; @@ -247,8 +243,13 @@ impl ElfImageLoader { }; Ok(image_info) } + + pub fn into_elf_bytes(self) -> Arc> { + self.data + } } +#[derive(Debug)] struct ElfNoteValue { value: u64, } @@ -262,7 +263,9 @@ impl BootImageLoader for ElfImageLoader { async fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()> { let elf = ElfBytes::::minimal_parse(self.data.as_slice())?; - let segments = elf.segments().ok_or(Error::ElfInvalidImage)?; + let segments = elf + .segments() + .ok_or(Error::ElfInvalidImage("segments missing"))?; debug!( "load dst={:#x} segments={}", diff --git a/crates/xen/xenplatform/src/error.rs b/crates/xen/xenplatform/src/error.rs index a90e6977..e33d7e98 100644 --- a/crates/xen/xenplatform/src/error.rs +++ b/crates/xen/xenplatform/src/error.rs @@ -28,10 +28,14 @@ pub enum Error { PopulatePhysmapFailed(usize, usize, usize), #[error("unknown elf compression method")] ElfCompressionUnknown, - #[error("expected elf image format not found")] - ElfInvalidImage, + #[error("elf image format invalid: {0}")] + ElfInvalidImage(&'static str), + #[error("elf linux image not found")] + ElfNotLinux, #[error("provided elf image does not contain xen support")] ElfXenSupportMissing, + #[error("provided elf image does not contain xen note {0}")] + ElfXenNoteMissing(&'static str), #[error("regex error: {0}")] RegexError(#[from] regex::Error), #[error("error: {0}")] diff --git a/crates/xen/xenplatform/src/lib.rs b/crates/xen/xenplatform/src/lib.rs index 6c841fcd..fac9ce33 100644 --- a/crates/xen/xenplatform/src/lib.rs +++ b/crates/xen/xenplatform/src/lib.rs @@ -4,9 +4,104 @@ pub mod error; pub mod mem; pub mod sys; +use boot::{BootDomain, BootImageInfo, BootImageLoader, BootSetupPlatform}; +use domain::{PlatformKernelConfig, PlatformResourcesConfig}; +use elfloader::ElfImageLoader; +use error::Result; +use unsupported::UnsupportedPlatform; +use xencall::{sys::CreateDomain, XenCall}; + use crate::error::Error; pub mod domain; pub mod unsupported; #[cfg(target_arch = "x86_64")] pub mod x86pv; + +#[derive(Clone)] +pub enum ImageLoader { + Elf(ElfImageLoader), +} + +impl ImageLoader { + async fn parse(&self, hvm: bool) -> Result { + match self { + ImageLoader::Elf(elf) => elf.parse(hvm).await, + } + } + + async fn load(&self, image_info: &BootImageInfo, dst: &mut [u8]) -> Result<()> { + match self { + ImageLoader::Elf(elf) => elf.load(image_info, dst).await, + } + } +} + +#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Ord)] +pub enum RuntimePlatformType { + Unsupported, + #[cfg(target_arch = "x86_64")] + Pv, +} + +impl RuntimePlatformType { + pub fn create(&self) -> RuntimePlatform { + match self { + RuntimePlatformType::Unsupported => { + RuntimePlatform::Unsupported(UnsupportedPlatform::new()) + } + #[cfg(target_arch = "x86_64")] + RuntimePlatformType::Pv => RuntimePlatform::Pv(x86pv::X86PvPlatform::new()), + } + } +} + +#[allow(clippy::large_enum_variant)] +pub enum RuntimePlatform { + Unsupported(UnsupportedPlatform), + #[cfg(target_arch = "x86_64")] + Pv(x86pv::X86PvPlatform), +} + +impl RuntimePlatform { + #[allow(clippy::too_many_arguments)] + pub async fn initialize( + &mut self, + domid: u32, + call: XenCall, + image_loader: &ImageLoader, + kernel: &PlatformKernelConfig, + resources: &PlatformResourcesConfig, + ) -> Result { + match self { + RuntimePlatform::Unsupported(unsupported) => { + unsupported + .initialize(domid, call, image_loader, kernel, resources) + .await + } + #[cfg(target_arch = "x86_64")] + RuntimePlatform::Pv(pv) => { + pv.initialize(domid, call, image_loader, kernel, resources) + .await + } + } + } + + pub async fn boot(&mut self, domid: u32, call: XenCall, domain: &mut BootDomain) -> Result<()> { + match self { + RuntimePlatform::Unsupported(unsupported) => { + unsupported.boot(domid, call, domain).await + } + #[cfg(target_arch = "x86_64")] + RuntimePlatform::Pv(pv) => pv.boot(domid, call, domain).await, + } + } + + pub fn create_domain(&self, enable_iommu: bool) -> CreateDomain { + match self { + RuntimePlatform::Unsupported(unsupported) => unsupported.create_domain(enable_iommu), + #[cfg(target_arch = "x86_64")] + RuntimePlatform::Pv(pv) => pv.create_domain(enable_iommu), + } + } +} diff --git a/crates/xen/xenplatform/src/x86pv.rs b/crates/xen/xenplatform/src/x86pv.rs index 51492d88..3a1188c5 100644 --- a/crates/xen/xenplatform/src/x86pv.rs +++ b/crates/xen/xenplatform/src/x86pv.rs @@ -9,8 +9,8 @@ use log::{debug, trace}; use nix::errno::Errno; use slice_copy::copy; use xencall::sys::{ - x8664VcpuGuestContext, CreateDomain, E820Entry, VcpuGuestContextAny, E820_MAX, E820_RAM, - E820_UNUSABLE, MMUEXT_PIN_L4_TABLE, XEN_DOMCTL_CDF_IOMMU, + x8664VcpuGuestContext, CreateDomain, VcpuGuestContextAny, MMUEXT_PIN_L4_TABLE, + XEN_DOMCTL_CDF_IOMMU, }; use crate::{ @@ -282,52 +282,6 @@ impl X86PvPlatform { self.table.mappings[m] = map; Ok(m) } - - fn e820_sanitize( - &self, - mut source: Vec, - map_limit_kb: u64, - ) -> Result> { - let mut e820 = vec![E820Entry::default(); E820_MAX as usize]; - - for entry in &mut source { - if entry.addr > 0x100000 { - continue; - } - - // entries under 1MB should be removed. - entry.typ = 0; - entry.size = 0; - entry.addr = u64::MAX; - } - - let mut lowest = u64::MAX; - let mut highest = 0; - - for entry in &source { - if entry.typ == E820_RAM || entry.typ == E820_UNUSABLE || entry.typ == 0 { - continue; - } - - lowest = if entry.addr < lowest { - entry.addr - } else { - lowest - }; - - highest = if entry.addr + entry.size > highest { - entry.addr + entry.size - } else { - highest - } - } - - e820[0].addr = 0; - e820[0].size = map_limit_kb << 10; - e820[0].typ = E820_RAM; - - Ok(e820) - } } #[async_trait::async_trait] @@ -690,8 +644,10 @@ impl BootSetupPlatform for X86PvPlatform { (*info).store_mfn = domain.phys.p2m[xenstore_segment.pfn as usize]; (*info).console.mfn = domain.console_mfn; (*info).console.evtchn = domain.console_evtchn; - (*info).mod_start = domain.initrd_segment.vstart; - (*info).mod_len = domain.initrd_segment.size; + if let Some(ref initrd_segment) = domain.initrd_segment { + (*info).mod_start = initrd_segment.vstart; + (*info).mod_len = initrd_segment.size; + } for (i, c) in domain.cmdline.chars().enumerate() { (*info).cmdline[i] = c as c_char; } @@ -715,12 +671,6 @@ impl BootSetupPlatform for X86PvPlatform { domain.phys.unmap(pg_pfn)?; domain.phys.unmap(p2m_segment.pfn)?; - let map = domain.call.get_memory_map(E820_MAX).await?; - let mem_mb = domain.total_pages >> (20 - self.page_shift()); - let mem_kb = mem_mb * 1024; - let e820 = self.e820_sanitize(map, mem_kb)?; - domain.call.set_memory_map(domain.domid, e820).await?; - domain .call .mmuext(domain.domid, MMUEXT_PIN_L4_TABLE, pg_mfn, 0)