diff --git a/Cargo.lock b/Cargo.lock index 53d65c443..b9dd9afab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3198,6 +3198,7 @@ dependencies = [ "atty", "bhyve_api", "clap 4.3.0", + "crucible-client-types", "ctrlc", "erased-serde", "futures", @@ -3214,6 +3215,7 @@ dependencies = [ "slog-term", "tokio", "toml 0.5.11", + "uuid", ] [[package]] diff --git a/README.md b/README.md index 7d315d1e8..82e1fb885 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Propolis -Propolis is a rust-based userspace for illumos bhyve. +Propolis VMM userspace for use with illumos bhyve. ## Prerequisites @@ -12,189 +12,35 @@ Propolis works best (and its CI tests run) on AMD hosts, but it can also be used to run VMs on Intel hosts. Live migration is primarily supported on AMD hosts but may work on Intel hosts as well. -## Building - -To build, run: - -```bash -$ cargo build -``` - -## propolis crate - -The main propolis crate is structured as a library providing the building -blocks to create bhyve backed VM instances. It also provides a number of -emulated devices that can be exposed to guests (e.g. serial port, virtio -devices, NVMe). - -## propolis-server - -Propolis is mostly intended to be used via a REST API to drive all of its -functionality. The standard `cargo build` will produce a `propolis-server` -binary you can run: - -### Running - -``` -# propolis-server run -``` - -Note that the server must run as root. One way to ensure propolis-server has -sufficient privileges is by using `pfexec(1)`, as such: - -``` -# pfexec propolis-server run -``` - -### Example Server Configuration - -**Note**: the goal is to move the device config from the toml -to instead be configured via REST API calls. - -```toml -bootrom = "/path/to/bootrom/OVMF_CODE.fd" - -[block_dev.alpine_iso] -type = "file" -path = "/path/to/alpine-extended-3.12.0-x86_64.iso" - -[dev.block0] -driver = "pci-virtio-block" -block_dev = "alpine_iso" -pci-path = "0.4.0" - -[dev.net0] -driver = "pci-virtio-viona" -vnic = "vnic_name" -pci-path = "0.5.0" -``` - -## propolis-cli - -Once you've got `propolis-server` running you can interact with it via the REST -API with any of the usual suspects (e.g. cURL, wget). Alternatively, there's a -`propolis-cli` binary to make things a bit easier: - -### Running - -The following CLI commands will create a VM, start the VM, and then attach to -its serial console: - -``` -# propolis-cli -s -p new -# propolis-cli -s -p state run -# propolis-cli -s -p serial -``` - -## propolis-standalone - -Server frontend aside, we also provide a standalone binary for quick -prototyping, `propolis-standalone`. It uses a static toml configuration: - -## Running - -``` -# propolis-standalone -``` - -Example configuration: -```toml -[main] -name = "testvm" -cpus = 4 -bootrom = "/path/to/bootrom/OVMF_CODE.fd" -memory = 1024 - -[block_dev.alpine_iso] -type = "file" -path = "/path/to/alpine-extended-3.12.0-x86_64.iso" - -[dev.block0] -driver = "pci-virtio-block" -block_dev = "alpine_iso" -pci-path = "0.4.0" - -[dev.net0] -driver = "pci-virtio-viona" -vnic = "vnic_name" -pci-path = "0.5.0" -``` - -Propolis will not destroy the VM instance on exit. If one exists with the -specified name on start-up, it will be destroyed and created fresh. - -Propolis will create a unix domain socket, available at "./ttya", -which acts as a serial port. One such tool for accessing this serial port is -[sercons](https://github.com/jclulow/vmware-sercons), though others (such as -"screen") would also work. - -### Quickstart to Alpine - -In the aforementioned config files, there are three major components -that need to be supplied: The guest firmware (bootrom) image, the ISO, and the -VNIC. - -Since this is a configuration file, you can supply whatever you'd like, but here -are some options to get up-and-running quickly: - -#### Guest bootrom - -The current recommended and tested guest bootrom is available -[here](https://buildomat.eng.oxide.computer/public/file/oxidecomputer/edk2/image_debug/6d92acf0a22718dd4175d7c64dbcf7aaec3740bd/OVMF_CODE.fd). - -Other UEFI firmware images built from the [Open Virtual Machine Firmware -project](https://github.com/tianocore/tianocore.github.io/wiki/OVMF) may also -work, but these aren't regularly tested and your mileage may vary. - -#### ISO - -Although there are many options for ISOs, an easy option that -should work is the [Alpine Linux distribution](https://alpinelinux.org/downloads/). - -These distributions are lightweight, and they have variants -custom-built for virtual machines. - -A straightforward option to start with is the "virtual" `x86_64` image. - -The "extended" variant contains more useful tools, but will require a -modification of the kernel arguments when booting to see the console on the -serial port. From Grub, this can be accomplished by pressing "e" (to edit), -adding "console=ttyS0" to the line starting with "/boot/vmlinuz-lts", and -pressing "Control + x" to boot with these parameters. - -#### VNIC - -To see your current network interfaces, you can use the following: - -```bash -$ dladm show-link -``` - -To create a vnic, you can use one of your physical devices -(like "e1000g0", if you have an ethernet connection) as a link -for a VNIC. This can be done as follows: - -```bash -NIC_NAME="vnic_prop0" -NIC_MAC="02:08:20:ac:e9:16" -NIC_LINK="e1000g0" - -if ! dladm show-vnic $NIC_NAME 2> /dev/null; then - dladm create-vnic -t -l $NIC_LINK -m $NIC_MAC $NIC_NAME -fi -``` - -#### Running a VM - -After you've got the bootrom, an ISO, a VNIC, and a configuration file that -points to them, you're ready to create and run your VM. To do so, make sure -you've done the following: -- [build propolis](#Building) -- run the [propolis-server](#propolis-server) -- create your VM, run it, and hop on the serial console using [propolis-cli](#propolis-cli) -- login to the VM as root (no password) -- optionally, run `setup-alpine` to configure the VM (including setting a root - password) +## Components + +Programs: +- [propolis-server](bin/propolis-server): Run a Propolis VM instance, operated + via REST API calls (typically by + [omicron](https://github.com/oxidecomputer/omicron)) +- [propolis-cli](bin/propolis-cli): CLI wrapper interface for `propolis-server` + API calls +- [propolis-standalone](bin/propolis-standalone): Simple standalone program to + run a Propolis VM instance, operated via a local config file + +Libraries: +- [propolis-client](lib/propolis-client): Rust crate for `propolis-server` API +- [propolis](lib/propolis): Represents the bulk of the emulation logic required + to implement a userspace VMM. Both `propolis-server` and + `propolis-standalone` are built around this. + +## Internal Crates + +These are not meant as committed public interfaces, but rather internal +implementation details, consumed by Propolis components. + +- bhyve-api: API (ioctls & structs) for the illumos bhyve kernel VMM +- dladm: Some thin wrappers around `dladm` queries +- propolis-server-config: Type definitions for `propolis-server` config file +- propolis-standalone-config: Type definitions for `propolis-standalone` config file +- propolis-types: Publically exposed (via `propolis-server`) types, intergral + to the `propolis` library +- viona-api: API (ioctls & structs) for the illumos viona driver ## License diff --git a/bin/propolis-server/README.md b/bin/propolis-server/README.md new file mode 100644 index 000000000..49cf967ad --- /dev/null +++ b/bin/propolis-server/README.md @@ -0,0 +1,66 @@ +# Propolis Server + +## Running + +Propolis is mostly intended to be used via a REST API to drive all of its +functionality. The standard `cargo build` will produce a `propolis-server` +binary you can run: + +``` +# propolis-server run +``` + +Note that the server must run as root. One way to ensure propolis-server has +sufficient privileges is by using `pfexec(1)`, as such: + +``` +# pfexec propolis-server run +``` + +## Example Configuration + +**Note**: the goal is to move the device config from the toml to instead be +configured via REST API calls. + +```toml +bootrom = "/path/to/bootrom/OVMF_CODE.fd" + +[block_dev.alpine_iso] +type = "file" +path = "/path/to/alpine-extended-3.12.0-x86_64.iso" + +[dev.block0] +driver = "pci-virtio-block" +block_dev = "alpine_iso" +pci-path = "0.4.0" + +[dev.net0] +driver = "pci-virtio-viona" +vnic = "vnic_name" +pci-path = "0.5.0" +``` + +## Prerequisites + +When running the server by hand, the appropriate bootrom is required to start +guests properly. See the [standalone +documentation](../propolis-standalone#guest-bootrom) for more details. Details +for [creating necessary vnics](../propolis-standalone#vnic) can be found there +as well, if exposing network devices to the guest. + +## CLI Interaction + +Once you've got `propolis-server` running you can interact with it via the REST +API with any of the usual suspects (e.g. cURL, wget). Alternatively, there's a +`propolis-cli` binary to make things a bit easier: + +### Running + +The following CLI commands will create a VM, start the VM, and then attach to +its serial console: + +``` +# propolis-cli -s -p new +# propolis-cli -s -p state run +# propolis-cli -s -p serial +``` diff --git a/bin/propolis-standalone/Cargo.toml b/bin/propolis-standalone/Cargo.toml index 3493bd344..80c4d3e4d 100644 --- a/bin/propolis-standalone/Cargo.toml +++ b/bin/propolis-standalone/Cargo.toml @@ -22,6 +22,7 @@ toml.workspace = true tokio = { workspace = true, features = ["io-util", "rt-multi-thread"] } serde = { workspace = true, features = ["derive"] } propolis.workspace = true +crucible-client-types = { workspace = true, optional = true } propolis-standalone-config = { workspace = true } erased-serde.workspace = true serde_json.workspace = true @@ -31,7 +32,8 @@ slog-dtrace.workspace = true slog-bunyan.workspace = true slog-term.workspace = true num_enum.workspace = true +uuid.workspace = true [features] default = [] -crucible = ["propolis/crucible-full", "propolis/oximeter"] +crucible = ["propolis/crucible-full", "propolis/oximeter", "crucible-client-types"] diff --git a/bin/propolis-standalone/README.md b/bin/propolis-standalone/README.md new file mode 100644 index 000000000..ee16dfbf5 --- /dev/null +++ b/bin/propolis-standalone/README.md @@ -0,0 +1,192 @@ +# Propolis Standalone + +Server frontend aside, we also provide a standalone binary for quick +prototyping, `propolis-standalone`. It uses a static toml configuration: + +## Running + +``` +# propolis-standalone +``` + +Example configuration: +```toml +[main] +name = "testvm" +cpus = 4 +bootrom = "/path/to/bootrom/OVMF_CODE.fd" +memory = 1024 + +[block_dev.alpine_iso] +type = "file" +path = "/path/to/alpine-extended-3.12.0-x86_64.iso" + +[dev.block0] +driver = "pci-virtio-block" +block_dev = "alpine_iso" +pci-path = "0.4.0" + +[dev.net0] +driver = "pci-virtio-viona" +vnic = "vnic_name" +pci-path = "0.5.0" +``` + +Propolis will not destroy the VM instance on exit. If one exists with the +specified name on start-up, it will be destroyed and created fresh. + +Propolis will create a unix domain socket, available at "./ttya", +which acts as a serial port. One such tool for accessing this serial port is +[sercons](https://github.com/jclulow/vmware-sercons), though others (such as +`screen`) would also work. + +## Quickstart to Alpine + +In the aforementioned config files, there are three major components +that need to be supplied: The guest firmware (bootrom) image, the ISO, and the +VNIC. + +Since this is a configuration file, you can supply whatever you'd like, but here +are some options to get up-and-running quickly: + +### Guest bootrom + +The current recommended and tested guest bootrom is available +[here](https://buildomat.eng.oxide.computer/public/file/oxidecomputer/edk2/image_debug/6d92acf0a22718dd4175d7c64dbcf7aaec3740bd/OVMF_CODE.fd). + +Other UEFI firmware images built from the [Open Virtual Machine Firmware +project](https://github.com/tianocore/tianocore.github.io/wiki/OVMF) may also +work, but these aren't regularly tested and your mileage may vary. + +### ISO + +Although there are many options for ISOs, an easy option that +should work is the [Alpine Linux distribution](https://alpinelinux.org/downloads/). + +These distributions are lightweight, and they have variants +custom-built for virtual machines. + +A straightforward option to start with is the "virtual" `x86_64` image. + +The "extended" variant contains more useful tools, but will require a +modification of the kernel arguments when booting to see the console on the +serial port. From Grub, this can be accomplished by pressing "e" (to edit), +adding "console=ttyS0" to the line starting with "/boot/vmlinuz-lts", and +pressing "Control + x" to boot with these parameters. + +### VNIC + +To see your current network interfaces, you can use the following: + +```bash +$ dladm show-link +``` + +To create a vnic, you can use one of your physical devices +(like "e1000g0", if you have an ethernet connection) as a link +for a VNIC. This can be done as follows: + +```bash +NIC_NAME="vnic_prop0" +NIC_MAC="02:08:20:ac:e9:16" +NIC_LINK="e1000g0" + +if ! dladm show-vnic $NIC_NAME 2> /dev/null; then + dladm create-vnic -t -l $NIC_LINK -m $NIC_MAC $NIC_NAME +fi +``` + +### Running a VM + +After you've got the bootrom, an ISO, a VNIC, and a configuration file that +points to them, you're ready to create and run your VM. To do so, make sure +you've done the following: +- [build propolis](#Building) +- run the [propolis-server](#propolis-server) +- create your VM, run it, and hop on the serial console using [propolis-cli](#propolis-cli) +- login to the VM as root (no password) +- optionally, run `setup-alpine` to configure the VM (including setting a root + password) + +## Using Crucible storage + +`propolis-standalone` supports defining crucible-backed storage devices in the +TOML config. It is somewhat inconvenient to do this without scripting, because +`generation` must monotonically increase with each successive connection to the +Downstairs datastore. So if you use this, you need to somehow monotonically bump +up that number in the TOML file before re-launching the VM, unless you're also +creating a new Downstairs region from scratch. + +All the crucible configuration options are crucible-specific, so future changes +to crucible may result in changes to the config options here as well. Consult +the [oxidecomputer/crucible](https://github.com/oxidecomputer/crucible) codebase +if you need low level details on what certain options actually do. + +Here's an example config. Read the comments for parameter-specific details: + +```toml +[block_dev.some_datastore] +type = "crucible" + +# === REQUIRED OPTIONS === +# these MUST match the region configuration downstairs +block_size = 512 +blocks_per_extent = 262144 +extent_count = 32 + +# Array of the SocketAddrs of the Downstairs instances. There must be three +# of these, or propolis-standalone will panic. +targets = [ + "127.0.0.1:3810", + "127.0.0.1:3820", + "127.0.0.1:3830", +] + +# Generation number used when connecting to Downstairs. This must +# monotonically increase with each successive connection to the Downstairs, +# which means that you need to bump this number every time you restart +# your VM. Kind of annoying, maybe we can get a better way to pass it in. +# Anyway, if you don't want to read-modify-write this value, a hack you +# could do is set this to the current number of seconds since the epoch. +# This'll always work, except for if the system time goes backwards, which +# it can definitely do! So, you know. Be careful. +generation = 1 +# === END REQUIRED OPTIONS === + + +# === OPTIONAL OPTIONS === +# This should be a UUID. It can be anything, really. When unset, defaults +# to a random UUIDv4 +# upstairs_id = "e4396bd0-ede1-48d7-ac14-3d2094dfba5b" + +# When true, some random amount of IO requests will synthetically "fail". +# This is useful when testing IO behavior under Bad Conditions. +# Defaults to false. +# lossy = false + +# the Upstairs (propolis-side) component of crucible currently regularly +# dispatches flushes to act as IO barriers. By default this happens once every 5 +# seconds, but you can adjust it with this option. +# flush_timeout = + +# Base64'd encryption key used to encrypt data at rest. Keys are 256 bits. +# Note that the region must have already been created with encryption +# enabled for this to work. That may change later though. +# encryption_key = "" + +# These three values are pem files for TLS encryption of data between +# propolis and the downstairs. +# cert_pem = "" +# key_pem = "" +# root_cert_pem = "" + +# Specifies the SocketAddr of the Upstairs crucible control interface. When +# ommitted, the control interface won't be started. The control interface is an +# HTTP server that exposes commands to take snapshots, simulate faults, and +# retrieve runtime debug information. +# control_addr = "" + +# When true, the device will be read-only. Defaults to false +# read_only = false +# === END OPTIONAL OPTIONS === +``` diff --git a/bin/propolis-standalone/src/config.rs b/bin/propolis-standalone/src/config.rs index bc0437139..e7fd29e0d 100644 --- a/bin/propolis-standalone/src/config.rs +++ b/bin/propolis-standalone/src/config.rs @@ -27,13 +27,11 @@ pub fn block_backend( "file" => { let path = be.options.get("path").unwrap().as_str().unwrap(); - let readonly: bool = || -> Option { - match be.options.get("readonly") { - Some(toml::Value::Boolean(read_only)) => Some(*read_only), - Some(toml::Value::String(v)) => v.parse().ok(), - _ => None, - } - }() + let readonly = (match be.options.get("readonly") { + Some(toml::Value::Boolean(read_only)) => Some(*read_only), + Some(toml::Value::String(v)) => v.parse().ok(), + _ => None, + }) .unwrap_or(false); let be = block::FileBackend::create( @@ -47,6 +45,10 @@ pub fn block_backend( let creg = ChildRegister::new(&be, Some(path.to_string())); (be, creg) } + #[cfg(feature = "crucible")] + "crucible" => create_crucible_backend(log, be), + #[cfg(not(feature = "crucible"))] + "crucible" => panic!("crucible device specified in VM config, but propolis-standalone was not built with crucible support. rebuild propolis-standalone with the `crucible` feature enabled, or remove all crucible devices from your VM config"), _ => { panic!("unrecognized block dev type {}!", be.bdtype); } @@ -75,3 +77,122 @@ pub fn parse_bdf(v: &str) -> Option { None } } + +#[cfg(feature = "crucible")] +fn create_crucible_backend( + log: &slog::Logger, + be: &propolis_standalone_config::BlockDevice, +) -> (Arc, ChildRegister) { + use slog::info; + use uuid::Uuid; + info!( + log, + "Building a crucible VolumeConstructionRequest from options {:?}", + be.options + ); + + // No defaults on here because we really shouldn't try and guess + // what block size the downstairs is using. A lot of things + // default to 512, but it's best not to assume it'll always be + // that way. + let block_size = + be.options.get("block_size").unwrap().as_integer().unwrap() as u64; + + let blocks_per_extent = + be.options.get("blocks_per_extent").unwrap().as_integer().unwrap() + as u64; + + let extent_count = + be.options.get("extent_count").unwrap().as_integer().unwrap() as u32; + + // Parse a UUID, or generate a random one if none is specified. + // Reasonable in something primarily used for testing like + // propolis-standalone, but you wouldn't want to do this in + // prod. + let uuid = be + .options + .get("upstairs_id") + .map(|x| Uuid::parse_str(x.as_str().unwrap()).unwrap()) + .unwrap_or_else(Uuid::new_v4); + + // The actual addresses of the three downstairs we're going to connect to. + let targets: Vec<_> = be + .options + .get("targets") + .unwrap() + .as_array() + .unwrap() + .iter() + .map(|target_val| target_val.as_str().unwrap().parse().unwrap()) + .collect(); + // There is currently no universe where an amount of Downstairs + // other than 3 is valid. + assert_eq!(targets.len(), 3); + + let lossy = + be.options.get("lossy").map(|x| x.as_bool().unwrap()).unwrap_or(false); + + let flush_timeout = + be.options.get("flush_timeout").map(|x| x.as_integer().unwrap() as u32); + + let key = be + .options + .get("encryption_key") + .map(|x| x.as_str().unwrap().to_string()); + + let cert_pem = + be.options.get("cert_pem").map(|x| x.as_str().unwrap().to_string()); + + let key_pem = + be.options.get("key_pem").map(|x| x.as_str().unwrap().to_string()); + + let root_cert_pem = be + .options + .get("root_cert_pem") + .map(|x| x.as_str().unwrap().to_string()); + + let control_addr = be + .options + .get("control_addr") + .map(|target_val| target_val.as_str().unwrap().parse().unwrap()); + + let read_only = be + .options + .get("read_only") + .map(|x| x.as_bool().unwrap()) + .unwrap_or(false); + + // This needs to increase monotonically with each successive + // connection to the downstairs. As a hack, you can set it to + // the current system time, and this will usually give us a newer + // generation than the last connection. NEVER do this in prod + // EVER. + let generation = + be.options.get("generation").unwrap().as_integer().unwrap() as u64; + + let req = crucible_client_types::VolumeConstructionRequest::Region { + block_size, + blocks_per_extent, + extent_count, + opts: crucible_client_types::CrucibleOpts { + id: uuid, + target: targets, + lossy, + flush_timeout, + key, + cert_pem, + key_pem, + root_cert_pem, + control: control_addr, + read_only, + }, + gen: generation, + }; + info!(log, "Creating Crucible disk from request {:?}", req); + // QUESTION: is producer_registry: None correct here? + let be = + block::CrucibleBackend::create(req.clone(), read_only, None).unwrap(); + let creg = + ChildRegister::new(&be, Some(be.get_uuid().unwrap().to_string())); + (be, creg) +}