From de3d68ff6b867c131fab492d600f824c661b03d7 Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Wed, 23 Oct 2024 13:38:05 +0000 Subject: [PATCH 01/15] mm: don't set DIRTY without WRITABLE When CET is enabled, pages marked as DIRTY but not WRITABLE are treated as special pages used for storing shadow stacks. We must not use this combination of flags for pages not meant to be used for shadow stacks. Signed-off-by: Tom Dohrmann --- kernel/src/mm/pagetable.rs | 8 ++++---- kernel/src/mm/vm/mapping/vmalloc.rs | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/src/mm/pagetable.rs b/kernel/src/mm/pagetable.rs index 6f3f13f89..a150e2f1a 100644 --- a/kernel/src/mm/pagetable.rs +++ b/kernel/src/mm/pagetable.rs @@ -146,7 +146,7 @@ bitflags! { impl PTEntryFlags { pub fn exec() -> Self { - Self::PRESENT | Self::GLOBAL | Self::ACCESSED | Self::DIRTY + Self::PRESENT | Self::GLOBAL | Self::ACCESSED } pub fn data() -> Self { @@ -154,11 +154,11 @@ impl PTEntryFlags { } pub fn data_ro() -> Self { - Self::PRESENT | Self::GLOBAL | Self::NX | Self::ACCESSED | Self::DIRTY + Self::PRESENT | Self::GLOBAL | Self::NX | Self::ACCESSED } pub fn task_exec() -> Self { - Self::PRESENT | Self::ACCESSED | Self::DIRTY + Self::PRESENT | Self::ACCESSED } pub fn task_data() -> Self { @@ -166,7 +166,7 @@ impl PTEntryFlags { } pub fn task_data_ro() -> Self { - Self::PRESENT | Self::NX | Self::ACCESSED | Self::DIRTY + Self::PRESENT | Self::NX | Self::ACCESSED } } diff --git a/kernel/src/mm/vm/mapping/vmalloc.rs b/kernel/src/mm/vm/mapping/vmalloc.rs index d4b03316c..246930968 100644 --- a/kernel/src/mm/vm/mapping/vmalloc.rs +++ b/kernel/src/mm/vm/mapping/vmalloc.rs @@ -36,11 +36,11 @@ impl VMalloc { pub fn new(size: usize, flags: VMFileMappingFlags) -> Result { let mut vmalloc = VMalloc { alloc: RawAllocMapping::new(size), - flags: PTEntryFlags::ACCESSED | PTEntryFlags::DIRTY, + flags: PTEntryFlags::ACCESSED, }; if flags.contains(VMFileMappingFlags::Write) { - vmalloc.flags |= PTEntryFlags::WRITABLE; + vmalloc.flags |= PTEntryFlags::WRITABLE | PTEntryFlags::DIRTY; } if !flags.contains(VMFileMappingFlags::Execute) { From 1e814714d9dd27a7e74513b862e3afb3e34038ca Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Thu, 12 Sep 2024 14:27:08 +0000 Subject: [PATCH 02/15] mm: implement VMKernelShadowStack The initialization and pt_flags are a bit special for shadow stack pages, so this warrants a new `VirtualMapping` implementations. Signed-off-by: Tom Dohrmann --- kernel/Cargo.toml | 1 + kernel/src/cpu/mod.rs | 1 + kernel/src/cpu/percpu.rs | 16 ++- kernel/src/cpu/shadow_stack.rs | 3 + kernel/src/mm/vm/mapping/mod.rs | 2 + kernel/src/mm/vm/mapping/shadow_stack.rs | 118 +++++++++++++++++++++++ kernel/src/mm/vm/mod.rs | 5 +- 7 files changed, 143 insertions(+), 3 deletions(-) create mode 100644 kernel/src/cpu/shadow_stack.rs create mode 100644 kernel/src/mm/vm/mapping/shadow_stack.rs diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index ca650067e..1a8df7b6d 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -46,6 +46,7 @@ enable-gdb = ["dep:gdbstub", "dep:gdbstub_arch"] mstpm = ["dep:libmstpm"] nosmep = [] nosmap = [] +shadow-stacks = [] [dev-dependencies] diff --git a/kernel/src/cpu/mod.rs b/kernel/src/cpu/mod.rs index 4cb4882dc..68f470607 100644 --- a/kernel/src/cpu/mod.rs +++ b/kernel/src/cpu/mod.rs @@ -17,6 +17,7 @@ pub mod mem; pub mod msr; pub mod percpu; pub mod registers; +pub mod shadow_stack; pub mod smp; pub mod sse; pub mod tlb; diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs index 35510f58f..df560ab35 100644 --- a/kernel/src/cpu/percpu.rs +++ b/kernel/src/cpu/percpu.rs @@ -17,7 +17,10 @@ use crate::error::{ApicError, SvsmError}; use crate::locking::{LockGuard, RWLock, RWLockIrqSafe, SpinLock}; use crate::mm::pagetable::{PTEntryFlags, PageTable}; use crate::mm::virtualrange::VirtualRange; -use crate::mm::vm::{Mapping, VMKernelStack, VMPhysMem, VMRMapping, VMReserved, VMR}; +use crate::mm::vm::{ + Mapping, ShadowStackInit, VMKernelShadowStack, VMKernelStack, VMPhysMem, VMRMapping, + VMReserved, VMR, +}; use crate::mm::{ virt_to_phys, PageBox, SVSM_PERCPU_BASE, SVSM_PERCPU_CAA_BASE, SVSM_PERCPU_END, SVSM_PERCPU_TEMP_BASE_2M, SVSM_PERCPU_TEMP_BASE_4K, SVSM_PERCPU_TEMP_END_2M, @@ -454,6 +457,17 @@ impl PerCpu { Ok(top_of_stack) } + fn allocate_shadow_stack( + &self, + base: VirtAddr, + init: ShadowStackInit, + ) -> Result { + let (shadow_stack, ssp) = VMKernelShadowStack::new(base, init)?; + self.vm_range + .insert_at(base, Arc::new(Mapping::new(shadow_stack)))?; + Ok(ssp) + } + fn allocate_init_stack(&self) -> Result<(), SvsmError> { let init_stack = Some(self.allocate_stack(SVSM_STACKS_INIT_TASK)?); self.init_stack.set(init_stack); diff --git a/kernel/src/cpu/shadow_stack.rs b/kernel/src/cpu/shadow_stack.rs new file mode 100644 index 000000000..9127342ba --- /dev/null +++ b/kernel/src/cpu/shadow_stack.rs @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: MIT OR Apache-2.0 + +pub const MODE_64BIT: usize = 1; diff --git a/kernel/src/mm/vm/mapping/mod.rs b/kernel/src/mm/vm/mapping/mod.rs index 982f3e01e..3cea29add 100644 --- a/kernel/src/mm/vm/mapping/mod.rs +++ b/kernel/src/mm/vm/mapping/mod.rs @@ -10,6 +10,7 @@ pub mod kernel_stack; pub mod phys_mem; pub mod rawalloc; pub mod reserved; +pub mod shadow_stack; pub mod vmalloc; pub use api::{Mapping, VMMAdapter, VMPageFaultResolution, VirtualMapping, VMM}; @@ -18,4 +19,5 @@ pub use kernel_stack::VMKernelStack; pub use phys_mem::VMPhysMem; pub use rawalloc::RawAllocMapping; pub use reserved::VMReserved; +pub use shadow_stack::{ShadowStackInit, VMKernelShadowStack}; pub use vmalloc::VMalloc; diff --git a/kernel/src/mm/vm/mapping/shadow_stack.rs b/kernel/src/mm/vm/mapping/shadow_stack.rs new file mode 100644 index 000000000..63ea84e7c --- /dev/null +++ b/kernel/src/mm/vm/mapping/shadow_stack.rs @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: MIT OR Apache-2.0 + +use crate::{ + address::{Address, PhysAddr, VirtAddr}, + cpu::shadow_stack, + error::SvsmError, + mm::{pagetable::PTEntryFlags, vm::VirtualMapping, PageRef, PAGE_SIZE}, +}; + +#[derive(Debug)] +pub enum ShadowStackInit { + /// The initial shadow stack used by a CPU. + /// + /// This won't place any tokens on the shadow stack. + Init, + /// A shadow stack to be used during normal execution of a task. + /// + /// This will create a shadow stack with a shadow stack restore token. + Normal { + /// The address of the first instruction that will be executed by the task. + entry_return: usize, + /// The address of the fucntion that's executed when the task exits. + exit_return: Option, + }, + /// A shadow stack to be used during context switches. + /// + /// This will create a shadow stack with a shadow stack restore token. + ContextSwitch, + /// A shadow stack to be used for exception handling (either in PL0_SSP or + /// in the ISST). + /// + /// This will create a shadow stack with a supervisor shadow stack token. + Exception, +} + +/// Mapping to be used as a kernel stack. This maps a stack including guard +/// pages at the top and bottom. +#[derive(Debug)] +pub struct VMKernelShadowStack { + page: PageRef, +} + +impl VMKernelShadowStack { + /// Create a new [`VMKernelShadowStack`]. + /// + /// # Returns + /// + /// Initialized shadow stack & initial SSP value on success, Err(SvsmError::Mem) on error + pub fn new(base: VirtAddr, init: ShadowStackInit) -> Result<(Self, VirtAddr), SvsmError> { + let page = PageRef::new()?; + + // Initialize the shadow stack. + let mut chunk = [0; 24]; + let ssp = match init { + ShadowStackInit::Normal { + entry_return, + exit_return, + } => { + // If exit_return is empty use an invalid non-canonical address. + let exit_return = exit_return.unwrap_or(0xa5a5_a5a5_a5a5_a5a5); + + let (token_bytes, rip_bytes) = chunk.split_at_mut(8); + + // Create a shadow stack restore token. + let token_addr = base + PAGE_SIZE - 24; + let token = (token_addr + 8).bits() + shadow_stack::MODE_64BIT; + token_bytes.copy_from_slice(&token.to_ne_bytes()); + + let (entry_bytes, exit_bytes) = rip_bytes.split_at_mut(8); + entry_bytes.copy_from_slice(&entry_return.to_ne_bytes()); + exit_bytes.copy_from_slice(&exit_return.to_ne_bytes()); + + token_addr + } + ShadowStackInit::ContextSwitch => { + let (_, token_bytes) = chunk.split_at_mut(16); + + // Create a shadow stack restore token. + let token_addr = base + PAGE_SIZE - 8; + let token = (token_addr + 8).bits() + shadow_stack::MODE_64BIT; + token_bytes.copy_from_slice(&token.to_ne_bytes()); + + token_addr + } + ShadowStackInit::Exception => { + let (_, token_bytes) = chunk.split_at_mut(16); + + // Create a supervisor shadow stack token. + let token_addr = base + PAGE_SIZE - 8; + let token = token_addr.bits(); + token_bytes.copy_from_slice(&token.to_ne_bytes()); + + token_addr + } + ShadowStackInit::Init => base + PAGE_SIZE - 8, + }; + + page.write(PAGE_SIZE - chunk.len(), &chunk); + + Ok((VMKernelShadowStack { page }, ssp)) + } +} + +impl VirtualMapping for VMKernelShadowStack { + fn mapping_size(&self) -> usize { + PAGE_SIZE + } + + fn map(&self, offset: usize) -> Option { + assert_eq!(offset, 0); + Some(self.page.phys_addr()) + } + + fn pt_flags(&self, _offset: usize) -> PTEntryFlags { + // The CPU requires shadow stacks to be dirty and not writable. + PTEntryFlags::NX | PTEntryFlags::ACCESSED | PTEntryFlags::DIRTY + } +} diff --git a/kernel/src/mm/vm/mod.rs b/kernel/src/mm/vm/mod.rs index 78815370a..6c0809612 100644 --- a/kernel/src/mm/vm/mod.rs +++ b/kernel/src/mm/vm/mod.rs @@ -8,7 +8,8 @@ mod mapping; mod range; pub use mapping::{ - Mapping, RawAllocMapping, VMFileMapping, VMFileMappingFlags, VMKernelStack, VMMAdapter, - VMPhysMem, VMReserved, VMalloc, VirtualMapping, VMM, + Mapping, RawAllocMapping, ShadowStackInit, VMFileMapping, VMFileMappingFlags, + VMKernelShadowStack, VMKernelStack, VMMAdapter, VMPhysMem, VMReserved, VMalloc, VirtualMapping, + VMM, }; pub use range::{VMRMapping, VMR, VMR_GRANULE}; From e41469aaa263bfeba19a723a6abcb30acca2317a Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Thu, 12 Sep 2024 14:32:58 +0000 Subject: [PATCH 03/15] percpu: allocate an initial shadow stack This shadow stack is used when not using a task's shadow stack. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/percpu.rs | 20 +++++++++++++++++++- kernel/src/mm/address_space.rs | 6 +++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs index df560ab35..ad96d19c2 100644 --- a/kernel/src/cpu/percpu.rs +++ b/kernel/src/cpu/percpu.rs @@ -24,7 +24,8 @@ use crate::mm::vm::{ use crate::mm::{ virt_to_phys, PageBox, SVSM_PERCPU_BASE, SVSM_PERCPU_CAA_BASE, SVSM_PERCPU_END, SVSM_PERCPU_TEMP_BASE_2M, SVSM_PERCPU_TEMP_BASE_4K, SVSM_PERCPU_TEMP_END_2M, - SVSM_PERCPU_TEMP_END_4K, SVSM_PERCPU_VMSA_BASE, SVSM_STACKS_INIT_TASK, SVSM_STACK_IST_DF_BASE, + SVSM_PERCPU_TEMP_END_4K, SVSM_PERCPU_VMSA_BASE, SVSM_SHADOW_STACKS_INIT_TASK, + SVSM_STACKS_INIT_TASK, SVSM_STACK_IST_DF_BASE, }; use crate::platform::{SvsmPlatform, SVSM_PLATFORM}; use crate::sev::ghcb::{GhcbPage, GHCB}; @@ -314,6 +315,7 @@ pub struct PerCpu { hv_doorbell: Cell>, init_stack: Cell>, + init_shadow_stack: Cell>, ist: IstStacks, /// Stack boundaries of the currently running task. @@ -345,6 +347,7 @@ impl PerCpu { ghcb: OnceCell::new(), hv_doorbell: Cell::new(None), init_stack: Cell::new(None), + init_shadow_stack: Cell::new(None), ist: IstStacks::new(), current_stack: Cell::new(MemoryRegion::new(VirtAddr::null(), 0)), } @@ -424,6 +427,10 @@ impl PerCpu { self.init_stack.get().unwrap() } + pub fn get_top_of_shadow_stack(&self) -> VirtAddr { + self.init_shadow_stack.get().unwrap() + } + pub fn get_top_of_df_stack(&self) -> VirtAddr { self.ist.double_fault_stack.get().unwrap() } @@ -474,6 +481,13 @@ impl PerCpu { Ok(()) } + fn allocate_init_shadow_stack(&self) -> Result<(), SvsmError> { + let init_stack = + Some(self.allocate_shadow_stack(SVSM_SHADOW_STACKS_INIT_TASK, ShadowStackInit::Init)?); + self.init_shadow_stack.set(init_stack); + Ok(()) + } + fn allocate_ist_stacks(&self) -> Result<(), SvsmError> { let double_fault_stack = self.allocate_stack(SVSM_STACK_IST_DF_BASE)?; self.ist.double_fault_stack.set(Some(double_fault_stack)); @@ -582,6 +596,10 @@ impl PerCpu { // Allocate per-cpu init stack self.allocate_init_stack()?; + if cfg!(feature = "shadow-stacks") { + self.allocate_init_shadow_stack()?; + } + // Allocate IST stacks self.allocate_ist_stacks()?; diff --git a/kernel/src/mm/address_space.rs b/kernel/src/mm/address_space.rs index 6a18b5751..705c3c88a 100644 --- a/kernel/src/mm/address_space.rs +++ b/kernel/src/mm/address_space.rs @@ -168,8 +168,12 @@ pub const SVSM_PERCPU_STACKS_BASE: VirtAddr = SVSM_PERCPU_BASE.const_add(SIZE_LE /// Stack address of the per-cpu init task pub const SVSM_STACKS_INIT_TASK: VirtAddr = SVSM_PERCPU_STACKS_BASE; +/// Shadow stack address of the per-cpu init task +pub const SVSM_SHADOW_STACKS_INIT_TASK: VirtAddr = + SVSM_STACKS_INIT_TASK.const_add(STACK_TOTAL_SIZE); + /// IST Stacks base address -pub const SVSM_STACKS_IST_BASE: VirtAddr = SVSM_STACKS_INIT_TASK.const_add(STACK_TOTAL_SIZE); +pub const SVSM_STACKS_IST_BASE: VirtAddr = SVSM_SHADOW_STACKS_INIT_TASK.const_add(PAGE_SIZE); /// DoubleFault IST stack base address pub const SVSM_STACK_IST_DF_BASE: VirtAddr = SVSM_STACKS_IST_BASE; From fd5aa4e0211718fbd85f02bb75bc471938e77365 Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Thu, 12 Sep 2024 14:40:55 +0000 Subject: [PATCH 04/15] percpu: setup ISST The interrupt shadow stack table (ISST) is very similar to the interrupt stack table (IST) except that it contains shadow stack addresses instead of normal stack addresses. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/isst.rs | 18 +++++++++++++ kernel/src/cpu/mod.rs | 1 + kernel/src/cpu/percpu.rs | 47 +++++++++++++++++++++++++++++++++- kernel/src/cpu/shadow_stack.rs | 2 ++ kernel/src/mm/address_space.rs | 5 +++- 5 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 kernel/src/cpu/isst.rs diff --git a/kernel/src/cpu/isst.rs b/kernel/src/cpu/isst.rs new file mode 100644 index 000000000..896ddf47f --- /dev/null +++ b/kernel/src/cpu/isst.rs @@ -0,0 +1,18 @@ +use core::num::NonZeroU8; + +use crate::address::VirtAddr; + +#[derive(Debug, Default, Clone, Copy)] +#[repr(C)] +pub struct Isst { + _reserved: u64, + entries: [VirtAddr; 7], +} + +impl Isst { + pub fn set(&mut self, index: NonZeroU8, addr: VirtAddr) { + // ISST entries start at index 1 + let index = usize::from(index.get() - 1); + self.entries[index] = addr; + } +} diff --git a/kernel/src/cpu/mod.rs b/kernel/src/cpu/mod.rs index 68f470607..d9de99e0d 100644 --- a/kernel/src/cpu/mod.rs +++ b/kernel/src/cpu/mod.rs @@ -13,6 +13,7 @@ pub mod features; pub mod gdt; pub mod idt; pub mod irq_state; +pub mod isst; pub mod mem; pub mod msr; pub mod percpu; diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs index ad96d19c2..2e9c8457a 100644 --- a/kernel/src/cpu/percpu.rs +++ b/kernel/src/cpu/percpu.rs @@ -7,6 +7,9 @@ extern crate alloc; use super::gdt_mut; +use super::isst::Isst; +use super::msr::write_msr; +use super::shadow_stack::ISST_ADDR; use super::tss::{X86Tss, IST_DF}; use crate::address::{Address, PhysAddr, VirtAddr}; use crate::cpu::idt::common::INT_INJ_VECTOR; @@ -25,7 +28,7 @@ use crate::mm::{ virt_to_phys, PageBox, SVSM_PERCPU_BASE, SVSM_PERCPU_CAA_BASE, SVSM_PERCPU_END, SVSM_PERCPU_TEMP_BASE_2M, SVSM_PERCPU_TEMP_BASE_4K, SVSM_PERCPU_TEMP_END_2M, SVSM_PERCPU_TEMP_END_4K, SVSM_PERCPU_VMSA_BASE, SVSM_SHADOW_STACKS_INIT_TASK, - SVSM_STACKS_INIT_TASK, SVSM_STACK_IST_DF_BASE, + SVSM_SHADOW_STACK_ISST_DF_BASE, SVSM_STACKS_INIT_TASK, SVSM_STACK_IST_DF_BASE, }; use crate::platform::{SvsmPlatform, SVSM_PLATFORM}; use crate::sev::ghcb::{GhcbPage, GHCB}; @@ -112,12 +115,14 @@ impl PerCpuAreas { #[derive(Debug)] struct IstStacks { double_fault_stack: Cell>, + double_fault_shadow_stack: Cell>, } impl IstStacks { const fn new() -> Self { IstStacks { double_fault_stack: Cell::new(None), + double_fault_shadow_stack: Cell::new(None), } } } @@ -293,6 +298,7 @@ pub struct PerCpu { pgtbl: RefCell>, tss: Cell, + isst: Cell, svsm_vmsa: OnceCell, reset_ip: Cell, /// PerCpu Virtual Memory Range @@ -329,6 +335,7 @@ impl PerCpu { pgtbl: RefCell::new(None), irq_state: IrqState::new(), tss: Cell::new(X86Tss::new()), + isst: Cell::new(Isst::default()), svsm_vmsa: OnceCell::new(), reset_ip: Cell::new(0xffff_fff0), vm_range: { @@ -435,6 +442,10 @@ impl PerCpu { self.ist.double_fault_stack.get().unwrap() } + pub fn get_top_of_df_shadow_stack(&self) -> VirtAddr { + self.ist.double_fault_shadow_stack.get().unwrap() + } + pub fn get_current_stack(&self) -> MemoryRegion { self.current_stack.get() } @@ -491,6 +502,17 @@ impl PerCpu { fn allocate_ist_stacks(&self) -> Result<(), SvsmError> { let double_fault_stack = self.allocate_stack(SVSM_STACK_IST_DF_BASE)?; self.ist.double_fault_stack.set(Some(double_fault_stack)); + + Ok(()) + } + + fn allocate_isst_shadow_stacks(&self) -> Result<(), SvsmError> { + let double_fault_shadow_stack = + self.allocate_shadow_stack(SVSM_SHADOW_STACK_ISST_DF_BASE, ShadowStackInit::Exception)?; + self.ist + .double_fault_shadow_stack + .set(Some(double_fault_shadow_stack)); + Ok(()) } @@ -542,6 +564,13 @@ impl PerCpu { self.tss.set(tss); } + fn setup_isst(&self) { + let double_fault_shadow_stack = self.get_top_of_df_shadow_stack(); + let mut isst = self.isst.get(); + isst.set(IST_DF, double_fault_shadow_stack); + self.isst.set(isst); + } + pub fn map_self_stage2(&self) -> Result<(), SvsmError> { let vaddr = VirtAddr::from(ptr::from_ref(self)); let paddr = virt_to_phys(vaddr); @@ -606,6 +635,14 @@ impl PerCpu { // Setup TSS self.setup_tss(); + if cfg!(feature = "shadow-stacks") { + // Allocate ISST shadow stacks + self.allocate_isst_shadow_stacks()?; + + // Setup ISST + self.setup_isst(); + } + // Initialize allocator for temporary mappings self.virt_range_init(); @@ -643,9 +680,17 @@ impl PerCpu { gdt_mut().load_tss(tss); } + pub fn load_isst(&self) { + let isst = self.isst.as_ptr(); + write_msr(ISST_ADDR, isst as u64); + } + pub fn load(&self) { self.load_pgtable(); self.load_tss(); + if cfg!(feature = "shadow-stacks") { + self.load_isst(); + } } pub fn set_reset_ip(&self, reset_ip: u64) { diff --git a/kernel/src/cpu/shadow_stack.rs b/kernel/src/cpu/shadow_stack.rs index 9127342ba..7f272198f 100644 --- a/kernel/src/cpu/shadow_stack.rs +++ b/kernel/src/cpu/shadow_stack.rs @@ -1,3 +1,5 @@ // SPDX-License-Identifier: MIT OR Apache-2.0 +pub const ISST_ADDR: u32 = 0x6a8; + pub const MODE_64BIT: usize = 1; diff --git a/kernel/src/mm/address_space.rs b/kernel/src/mm/address_space.rs index 705c3c88a..99a2f3a82 100644 --- a/kernel/src/mm/address_space.rs +++ b/kernel/src/mm/address_space.rs @@ -177,9 +177,12 @@ pub const SVSM_STACKS_IST_BASE: VirtAddr = SVSM_SHADOW_STACKS_INIT_TASK.const_ad /// DoubleFault IST stack base address pub const SVSM_STACK_IST_DF_BASE: VirtAddr = SVSM_STACKS_IST_BASE; +/// DoubleFault ISST shadow stack base address +pub const SVSM_SHADOW_STACK_ISST_DF_BASE: VirtAddr = + SVSM_STACKS_IST_BASE.const_add(STACK_TOTAL_SIZE); /// PerCPU XSave Context area base address -pub const SVSM_XSAVE_AREA_BASE: VirtAddr = SVSM_STACKS_IST_BASE.const_add(STACK_TOTAL_SIZE); +pub const SVSM_XSAVE_AREA_BASE: VirtAddr = SVSM_SHADOW_STACK_ISST_DF_BASE.const_add(PAGE_SIZE); /// Base Address for temporary mappings - used by page-table guards pub const SVSM_PERCPU_TEMP_BASE: VirtAddr = SVSM_PERCPU_BASE.const_add(SIZE_LEVEL2); From f2a4429ec609f225afa751e1a45af050a3fd94a9 Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Thu, 12 Sep 2024 14:47:45 +0000 Subject: [PATCH 05/15] task: allocate shadow stacks for each task Each task needs to a normal shadow stack and shadow stack used for exception handling. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/shadow_stack.rs | 1 + kernel/src/mm/address_space.rs | 10 ++++- kernel/src/task/schedule.rs | 5 +++ kernel/src/task/tasks.rs | 74 ++++++++++++++++++++++++++++++++-- 4 files changed, 85 insertions(+), 5 deletions(-) diff --git a/kernel/src/cpu/shadow_stack.rs b/kernel/src/cpu/shadow_stack.rs index 7f272198f..11b77c607 100644 --- a/kernel/src/cpu/shadow_stack.rs +++ b/kernel/src/cpu/shadow_stack.rs @@ -1,5 +1,6 @@ // SPDX-License-Identifier: MIT OR Apache-2.0 +pub const PL0_SSP: u32 = 0x6a4; pub const ISST_ADDR: u32 = 0x6a8; pub const MODE_64BIT: usize = 1; diff --git a/kernel/src/mm/address_space.rs b/kernel/src/mm/address_space.rs index 99a2f3a82..205cab349 100644 --- a/kernel/src/mm/address_space.rs +++ b/kernel/src/mm/address_space.rs @@ -209,9 +209,17 @@ pub const SVSM_PERTASK_END: VirtAddr = SVSM_PERTASK_BASE.const_add(SIZE_LEVEL3); /// Kernel stack for a task pub const SVSM_PERTASK_STACK_BASE: VirtAddr = SVSM_PERTASK_BASE; +/// Kernel shadow stack for normal execution of a task +pub const SVSM_PERTASK_SHADOW_STACK_BASE: VirtAddr = + SVSM_PERTASK_STACK_BASE.const_add(STACK_TOTAL_SIZE); + +/// Kernel shadow stack for exception handling +pub const SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE: VirtAddr = + SVSM_PERTASK_SHADOW_STACK_BASE.const_add(PAGE_SIZE); + /// SSE context save area for a task pub const SVSM_PERTASK_XSAVE_AREA_BASE: VirtAddr = - SVSM_PERTASK_STACK_BASE.const_add(STACK_TOTAL_SIZE); + SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE.const_add(PAGE_SIZE); /// Page table self-map level 3 index pub const PGTABLE_LVL3_IDX_PTE_SELFMAP: usize = 493; diff --git a/kernel/src/task/schedule.rs b/kernel/src/task/schedule.rs index aedbe86fb..b70f3cab8 100644 --- a/kernel/src/task/schedule.rs +++ b/kernel/src/task/schedule.rs @@ -33,7 +33,9 @@ extern crate alloc; use super::INITIAL_TASK_ID; use super::{Task, TaskListAdapter, TaskPointer, TaskRunListAdapter}; use crate::address::Address; +use crate::cpu::msr::write_msr; use crate::cpu::percpu::{irq_nesting_count, this_cpu}; +use crate::cpu::shadow_stack::PL0_SSP; use crate::cpu::sse::sse_restore_context; use crate::cpu::sse::sse_save_context; use crate::cpu::IrqGuard; @@ -351,6 +353,9 @@ pub fn schedule() { } this_cpu().set_tss_rsp0(next.stack_bounds.end()); + if cfg!(feature = "shadow-stacks") { + write_msr(PL0_SSP, next.exception_shadow_stack.bits() as u64); + } // Get task-pointers, consuming the Arcs and release their reference unsafe { diff --git a/kernel/src/task/tasks.rs b/kernel/src/task/tasks.rs index 53c4e7108..908b02f72 100644 --- a/kernel/src/task/tasks.rs +++ b/kernel/src/task/tasks.rs @@ -23,11 +23,13 @@ use crate::error::SvsmError; use crate::fs::FileHandle; use crate::locking::{RWLock, SpinLock}; use crate::mm::pagetable::{PTEntryFlags, PageTable}; -use crate::mm::vm::{Mapping, VMFileMappingFlags, VMKernelStack, VMR}; -use crate::mm::PageBox; +use crate::mm::vm::{ + Mapping, ShadowStackInit, VMFileMappingFlags, VMKernelShadowStack, VMKernelStack, VMR, +}; use crate::mm::{ - mappings::create_anon_mapping, mappings::create_file_mapping, VMMappingGuard, - SVSM_PERTASK_BASE, SVSM_PERTASK_END, SVSM_PERTASK_STACK_BASE, USER_MEM_END, USER_MEM_START, + mappings::create_anon_mapping, mappings::create_file_mapping, PageBox, VMMappingGuard, + SVSM_PERTASK_BASE, SVSM_PERTASK_END, SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE, + SVSM_PERTASK_SHADOW_STACK_BASE, SVSM_PERTASK_STACK_BASE, USER_MEM_END, USER_MEM_START, }; use crate::syscall::{Obj, ObjError, ObjHandle}; use crate::types::{SVSM_USER_CS, SVSM_USER_DS}; @@ -119,11 +121,15 @@ impl TaskSchedState { pub struct Task { pub rsp: u64, + pub ssp: VirtAddr, + /// XSave area pub xsa: PageBox<[u8]>, pub stack_bounds: MemoryRegion, + pub exception_shadow_stack: VirtAddr, + /// Page table that is loaded when the task is scheduled pub page_table: SpinLock>, @@ -187,6 +193,34 @@ impl Task { let xsa = Self::allocate_xsave_area(); let xsa_addr = u64::from(xsa.vaddr()) as usize; + + let mut shadow_stack_offset = VirtAddr::null(); + let mut exception_shadow_stack = VirtAddr::null(); + if cfg!(feature = "shadow-stacks") { + let shadow_stack; + (shadow_stack, shadow_stack_offset) = VMKernelShadowStack::new( + SVSM_PERTASK_SHADOW_STACK_BASE, + ShadowStackInit::Normal { + entry_return: run_kernel_task as usize, + exit_return: Some(task_exit as usize), + }, + )?; + vm_kernel_range.insert_at( + SVSM_PERTASK_SHADOW_STACK_BASE, + Arc::new(Mapping::new(shadow_stack)), + )?; + + let shadow_stack; + (shadow_stack, exception_shadow_stack) = VMKernelShadowStack::new( + SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE, + ShadowStackInit::Exception, + )?; + vm_kernel_range.insert_at( + SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE, + Arc::new(Mapping::new(shadow_stack)), + )?; + } + let (stack, raw_bounds, rsp_offset) = Self::allocate_ktask_stack(cpu, entry, xsa_addr)?; vm_kernel_range.insert_at(SVSM_PERTASK_STACK_BASE, stack)?; @@ -204,8 +238,10 @@ impl Task { .checked_sub(rsp_offset) .expect("Invalid stack offset from task::allocate_ktask_stack()") .bits() as u64, + ssp: shadow_stack_offset, xsa, stack_bounds: bounds, + exception_shadow_stack, page_table: SpinLock::new(pgtable), vm_kernel_range, vm_user_range: None, @@ -231,6 +267,34 @@ impl Task { let xsa = Self::allocate_xsave_area(); let xsa_addr = u64::from(xsa.vaddr()) as usize; + + let mut shadow_stack_offset = VirtAddr::null(); + let mut exception_shadow_stack = VirtAddr::null(); + if cfg!(feature = "shadow-stacks") { + let shadow_stack; + (shadow_stack, shadow_stack_offset) = VMKernelShadowStack::new( + SVSM_PERTASK_SHADOW_STACK_BASE, + ShadowStackInit::Normal { + entry_return: return_new_task as usize, + exit_return: None, + }, + )?; + vm_kernel_range.insert_at( + SVSM_PERTASK_SHADOW_STACK_BASE, + Arc::new(Mapping::new(shadow_stack)), + )?; + + let shadow_stack; + (shadow_stack, exception_shadow_stack) = VMKernelShadowStack::new( + SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE, + ShadowStackInit::Exception, + )?; + vm_kernel_range.insert_at( + SVSM_PERTASK_EXCEPTION_SHADOW_STACK_BASE, + Arc::new(Mapping::new(shadow_stack)), + )?; + } + let (stack, raw_bounds, stack_offset) = Self::allocate_utask_stack(cpu, user_entry, xsa_addr)?; vm_kernel_range.insert_at(SVSM_PERTASK_STACK_BASE, stack)?; @@ -252,8 +316,10 @@ impl Task { .checked_sub(stack_offset) .expect("Invalid stack offset from task::allocate_utask_stack()") .bits() as u64, + ssp: shadow_stack_offset, xsa, stack_bounds: bounds, + exception_shadow_stack, page_table: SpinLock::new(pgtable), vm_kernel_range, vm_user_range: Some(vm_user_range), From dae1da48658749bc68e21ca5db876cf81dc90b53 Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Thu, 12 Sep 2024 14:52:02 +0000 Subject: [PATCH 06/15] idt: add shadow stack pointer to exception context Some exception handlers will need to update the shadow stack, so they need to know the shadow stack pointer at the time of the exception. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/idt/common.rs | 2 ++ kernel/src/cpu/idt/entry.S | 52 ++++++++++++++++++++++-------------- kernel/src/cpu/idt/svsm.rs | 4 +++ 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/kernel/src/cpu/idt/common.rs b/kernel/src/cpu/idt/common.rs index 3a1754060..f9469ef96 100644 --- a/kernel/src/cpu/idt/common.rs +++ b/kernel/src/cpu/idt/common.rs @@ -61,6 +61,8 @@ bitflags::bitflags! { #[repr(C, packed)] #[derive(Default, Debug, Clone, Copy)] pub struct X86ExceptionContext { + pub ssp: u64, + _padding: [u8; 8], pub regs: X86GeneralRegs, pub error_code: usize, pub frame: X86InterruptFrame, diff --git a/kernel/src/cpu/idt/entry.S b/kernel/src/cpu/idt/entry.S index cfd04c37f..fb069db6c 100644 --- a/kernel/src/cpu/idt/entry.S +++ b/kernel/src/cpu/idt/entry.S @@ -29,6 +29,12 @@ HV_DOORBELL_ADDR: pushq %r13 pushq %r14 pushq %r15 + pushq $0 + # rdsspq is a nop when shadow stacks are not supported. Make sure that + # rax is 0 if that's the case. + xorl %eax, %eax + rdsspq %rax + pushq %rax .endm .macro default_entry_no_ist name: req handler:req error_code:req vector:req @@ -141,7 +147,7 @@ hv_not_vmpl_switch: // required. This could not be performed before the RIP check because // the previous RIP determines where to find the previous EFLAGS.IF // value on the stack. - testl ${IF}, 18*8(%rcx) + testl ${IF}, 20*8(%rcx) jz postpone_hv // Switch to the stack pointer from the previous exception, which // points to the register save area, and continue with #HV @@ -206,6 +212,12 @@ continue_hv: pushq %r13 pushq %r14 pushq %r15 + pushq $0 + # rdsspq is a nop when shadow stacks are not supported. Make sure that + # rdx is 0 if that's the case. + xorl %edx, %edx + rdsspq %rdx + pushq %rdx handle_as_hv: // Load the address of the #HV doorbell page. The global address // might not yet be configured, and the per-CPU page might also not @@ -225,13 +237,13 @@ handle_as_hv_with_doorbell: default_return: // Ensure that interrupts are disabled before attempting any return. cli - testb $3, 17*8(%rsp) // Check CS in exception frame + testb $3, 19*8(%rsp) // Check CS in exception frame jnz return_user return_all_paths: // If interrupts were previously available, then check whether any #HV // events are pending. If so, proceed as if the original trap was // #HV. - testl ${IF}, 18*8(%rsp) // check EFLAGS.IF in exception frame + testl ${IF}, 20*8(%rsp) // check EFLAGS.IF in exception frame jz begin_iret_return movq HV_DOORBELL_ADDR(%rip), %rdi test %rdi, %rdi @@ -251,23 +263,23 @@ iret_return_window: begin_iret_return: // Reload registers without modifying the stack pointer so that if #HV // occurs within this window, the saved registers are still intact. - movq 0*8(%rsp), %r15 - movq 1*8(%rsp), %r14 - movq 2*8(%rsp), %r13 - movq 3*8(%rsp), %r12 - movq 4*8(%rsp), %r11 - movq 5*8(%rsp), %r10 - movq 6*8(%rsp), %r9 - movq 7*8(%rsp), %r8 - movq 8*8(%rsp), %rbp - movq 9*8(%rsp), %rdi - movq 10*8(%rsp), %rsi - movq 11*8(%rsp), %rdx - movq 12*8(%rsp), %rcx - movq 13*8(%rsp), %rbx - movq 14*8(%rsp), %rax - - addq $16*8, %rsp + movq 2*8(%rsp), %r15 + movq 3*8(%rsp), %r14 + movq 4*8(%rsp), %r13 + movq 5*8(%rsp), %r12 + movq 6*8(%rsp), %r11 + movq 7*8(%rsp), %r10 + movq 8*8(%rsp), %r9 + movq 9*8(%rsp), %r8 + movq 10*8(%rsp), %rbp + movq 11*8(%rsp), %rdi + movq 12*8(%rsp), %rsi + movq 13*8(%rsp), %rdx + movq 14*8(%rsp), %rcx + movq 15*8(%rsp), %rbx + movq 16*8(%rsp), %rax + + addq $18*8, %rsp default_iret: iretq diff --git a/kernel/src/cpu/idt/svsm.rs b/kernel/src/cpu/idt/svsm.rs index bcce012bb..120b402a5 100644 --- a/kernel/src/cpu/idt/svsm.rs +++ b/kernel/src/cpu/idt/svsm.rs @@ -280,6 +280,10 @@ global_asm!( .set const_true, 1 "#, concat!(".set CFG_NOSMAP, const_", cfg!(feature = "nosmap")), + concat!( + ".set CFG_SHADOW_STACKS, const_", + cfg!(feature = "shadow-stacks") + ), include_str!("../x86/smap.S"), include_str!("entry.S"), IF = const RFlags::IF.bits(), From e29672133dfd424bb4348067ca0609439962fb93 Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Thu, 12 Sep 2024 14:54:07 +0000 Subject: [PATCH 07/15] idt: update return address on shadow stack Whenever we update the return address on the shadow stack, we'll also need to update the return address on the shadow stack. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/extable.rs | 2 +- kernel/src/cpu/idt/common.rs | 19 +++++++++++++++++++ kernel/src/cpu/vc.rs | 3 ++- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/kernel/src/cpu/extable.rs b/kernel/src/cpu/extable.rs index e8925ad8e..3eaa565a6 100644 --- a/kernel/src/cpu/extable.rs +++ b/kernel/src/cpu/extable.rs @@ -74,7 +74,7 @@ pub fn handle_exception_table(ctx: &mut X86ExceptionContext) -> bool { // If an exception hit in an area covered by the exception table, set rcx to -1 if new_rip != ex_rip { ctx.regs.rcx = !0usize; - ctx.frame.rip = new_rip.bits(); + ctx.set_rip(new_rip.bits()); return true; } diff --git a/kernel/src/cpu/idt/common.rs b/kernel/src/cpu/idt/common.rs index f9469ef96..a97d7e29c 100644 --- a/kernel/src/cpu/idt/common.rs +++ b/kernel/src/cpu/idt/common.rs @@ -68,6 +68,25 @@ pub struct X86ExceptionContext { pub frame: X86InterruptFrame, } +impl X86ExceptionContext { + pub fn set_rip(&mut self, new_rip: usize) { + self.frame.rip = new_rip; + + if cfg!(feature = "shadow-stacks") { + // Update the instruction pointer on the shadow stack. + let return_on_stack = (self.ssp + 8) as *const usize; + let return_on_stack_val = new_rip; + unsafe { + asm!( + "wrssq [{}], {}", + in(reg) return_on_stack, + in(reg) return_on_stack_val + ); + } + } + } +} + impl InsnMachineCtx for X86ExceptionContext { fn read_efer(&self) -> u64 { read_efer().bits() diff --git a/kernel/src/cpu/vc.rs b/kernel/src/cpu/vc.rs index c15afe4fd..9dd81a54e 100644 --- a/kernel/src/cpu/vc.rs +++ b/kernel/src/cpu/vc.rs @@ -234,7 +234,8 @@ fn snp_cpuid(ctx: &mut X86ExceptionContext) -> Result<(), SvsmError> { } fn vc_finish_insn(ctx: &mut X86ExceptionContext, insn_ctx: &Option) { - ctx.frame.rip += insn_ctx.as_ref().map_or(0, |d| d.size()) + let new_rip = ctx.frame.rip + insn_ctx.as_ref().map_or(0, |d| d.size()); + ctx.set_rip(new_rip); } fn ioio_get_port(source: Operand, ctx: &X86ExceptionContext) -> u16 { From 5d21228063d704c8d96c8f3b755e1a7465b5547f Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Wed, 25 Sep 2024 09:31:46 +0000 Subject: [PATCH 08/15] idt: fixup shadow stack in #HV handler The #HV handler messes with the stack frame and shadow stack needs to be adjusted accordingly. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/idt/entry.S | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/src/cpu/idt/entry.S b/kernel/src/cpu/idt/entry.S index fb069db6c..8d1681af8 100644 --- a/kernel/src/cpu/idt/entry.S +++ b/kernel/src/cpu/idt/entry.S @@ -113,6 +113,11 @@ asm_entry_hv: // RIP is in the return window, so update RIP to the cancel point. leaq switch_vmpl_cancel(%rip), %rbx movq %rbx, 0x20(%rsp) + .if CFG_SHADOW_STACKS + // Update RIP on the shadow stack to the cancel point. + rdsspq %rax + wrssq %rbx, 8(%rax) + .endif // Defer any further processing until interrupts can be processed. jmp postpone_hv hv_not_vmpl_switch: @@ -153,6 +158,10 @@ hv_not_vmpl_switch: // points to the register save area, and continue with #HV // processing. movq %rcx, %rsp + // Pop the current stack frame, so that the previous stack frame sits + // on top of the shadow stack. + movl $3, %eax + incsspq %rax jmp handle_as_hv postpone_hv: @@ -195,6 +204,12 @@ restart_hv: movq 2*8(%rsp), %rax movq %rax, -2*8(%rcx) leaq -4*8(%rcx), %rsp + .if CFG_SHADOW_STACKS + // Pop the current stack frame, so that the previous stack frame sits + // on top of the shadow stack. + movl $3, %eax + incsspq %rax + .endif continue_hv: // At this point, only the dummy error code and first three registers From 6029dcec35d1ba52e19f37599767138b2948d527 Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Fri, 25 Oct 2024 11:03:05 +0000 Subject: [PATCH 09/15] address: add VirtAddr::as_usize Unlike the various From and Into implementations, this method can be called in const contexts. Signed-off-by: Tom Dohrmann --- kernel/src/address.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/src/address.rs b/kernel/src/address.rs index 9936d2950..418fb5281 100644 --- a/kernel/src/address.rs +++ b/kernel/src/address.rs @@ -227,6 +227,11 @@ impl VirtAddr { self.0 as *mut T } + #[inline] + pub const fn as_usize(&self) -> usize { + self.0 + } + /// Converts the `VirtAddr` to a reference to the given type, checking /// that the address is not NULL and properly aligned. /// From a448d45051e96d84c4752b6a956cd876034f0ab8 Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Wed, 18 Sep 2024 09:49:21 +0000 Subject: [PATCH 10/15] schedule: switch to special stack during context switches We need to guard against IRQs coming in after switching to the new page tables and before switching to the new stack. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/percpu.rs | 12 ++++++++++-- kernel/src/mm/address_space.rs | 5 ++++- kernel/src/task/schedule.rs | 17 ++++++++++++++++- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs index 2e9c8457a..c35f6af5a 100644 --- a/kernel/src/cpu/percpu.rs +++ b/kernel/src/cpu/percpu.rs @@ -25,8 +25,8 @@ use crate::mm::vm::{ VMReserved, VMR, }; use crate::mm::{ - virt_to_phys, PageBox, SVSM_PERCPU_BASE, SVSM_PERCPU_CAA_BASE, SVSM_PERCPU_END, - SVSM_PERCPU_TEMP_BASE_2M, SVSM_PERCPU_TEMP_BASE_4K, SVSM_PERCPU_TEMP_END_2M, + virt_to_phys, PageBox, SVSM_CONTEXT_SWITCH_STACK, SVSM_PERCPU_BASE, SVSM_PERCPU_CAA_BASE, + SVSM_PERCPU_END, SVSM_PERCPU_TEMP_BASE_2M, SVSM_PERCPU_TEMP_BASE_4K, SVSM_PERCPU_TEMP_END_2M, SVSM_PERCPU_TEMP_END_4K, SVSM_PERCPU_VMSA_BASE, SVSM_SHADOW_STACKS_INIT_TASK, SVSM_SHADOW_STACK_ISST_DF_BASE, SVSM_STACKS_INIT_TASK, SVSM_STACK_IST_DF_BASE, }; @@ -499,6 +499,11 @@ impl PerCpu { Ok(()) } + fn allocate_context_switch_stack(&self) -> Result<(), SvsmError> { + self.allocate_stack(SVSM_CONTEXT_SWITCH_STACK)?; + Ok(()) + } + fn allocate_ist_stacks(&self) -> Result<(), SvsmError> { let double_fault_stack = self.allocate_stack(SVSM_STACK_IST_DF_BASE)?; self.ist.double_fault_stack.set(Some(double_fault_stack)); @@ -629,6 +634,9 @@ impl PerCpu { self.allocate_init_shadow_stack()?; } + // Allocate per-cpu context switch stack + self.allocate_context_switch_stack()?; + // Allocate IST stacks self.allocate_ist_stacks()?; diff --git a/kernel/src/mm/address_space.rs b/kernel/src/mm/address_space.rs index 205cab349..ec4859a6e 100644 --- a/kernel/src/mm/address_space.rs +++ b/kernel/src/mm/address_space.rs @@ -172,8 +172,11 @@ pub const SVSM_STACKS_INIT_TASK: VirtAddr = SVSM_PERCPU_STACKS_BASE; pub const SVSM_SHADOW_STACKS_INIT_TASK: VirtAddr = SVSM_STACKS_INIT_TASK.const_add(STACK_TOTAL_SIZE); +/// Stack address to use during context switches +pub const SVSM_CONTEXT_SWITCH_STACK: VirtAddr = SVSM_SHADOW_STACKS_INIT_TASK.const_add(PAGE_SIZE); + /// IST Stacks base address -pub const SVSM_STACKS_IST_BASE: VirtAddr = SVSM_SHADOW_STACKS_INIT_TASK.const_add(PAGE_SIZE); +pub const SVSM_STACKS_IST_BASE: VirtAddr = SVSM_CONTEXT_SWITCH_STACK.const_add(STACK_TOTAL_SIZE); /// DoubleFault IST stack base address pub const SVSM_STACK_IST_DF_BASE: VirtAddr = SVSM_STACKS_IST_BASE; diff --git a/kernel/src/task/schedule.rs b/kernel/src/task/schedule.rs index b70f3cab8..f4b70503d 100644 --- a/kernel/src/task/schedule.rs +++ b/kernel/src/task/schedule.rs @@ -32,7 +32,7 @@ extern crate alloc; use super::INITIAL_TASK_ID; use super::{Task, TaskListAdapter, TaskPointer, TaskRunListAdapter}; -use crate::address::Address; +use crate::address::{Address, VirtAddr}; use crate::cpu::msr::write_msr; use crate::cpu::percpu::{irq_nesting_count, this_cpu}; use crate::cpu::shadow_stack::PL0_SSP; @@ -41,6 +41,7 @@ use crate::cpu::sse::sse_save_context; use crate::cpu::IrqGuard; use crate::error::SvsmError; use crate::locking::SpinLock; +use crate::mm::{STACK_TOTAL_SIZE, SVSM_CONTEXT_SWITCH_STACK}; use alloc::sync::Arc; use core::arch::{asm, global_asm}; use core::cell::OnceCell; @@ -414,6 +415,9 @@ global_asm!( jz 1f movq %rsp, {TASK_RSP_OFFSET}(%rsi) + // Switch to a stack pointer that's valid in both the old and new page tables. + mov ${CONTEXT_SWITCH_STACK}, %rsp + 1: // Switch to the new task state mov %rdx, %cr3 @@ -445,5 +449,16 @@ global_asm!( ret "#, TASK_RSP_OFFSET = const offset_of!(Task, rsp), + CONTEXT_SWITCH_STACK = const CONTEXT_SWITCH_STACK.as_usize(), options(att_syntax) ); + +/// The location of a cpu-local stack that's mapped into every set of page +/// tables for use during context switches. +/// +/// If an IRQ is raised after switching the page tables but before switching +/// to the new stack, the CPU will try to access the old stack in the new page +/// tables. To protect against this, we switch to another stack that's mapped +/// into both the old and the new set of page tables. That way we always have a +/// valid stack to handle exceptions on. +const CONTEXT_SWITCH_STACK: VirtAddr = SVSM_CONTEXT_SWITCH_STACK.const_add(STACK_TOTAL_SIZE); From 7c4b93fdfbbdd57aa103245f1977150d0c21ca66 Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Thu, 12 Sep 2024 15:02:13 +0000 Subject: [PATCH 11/15] schedule: switch shadow stacks in context switch Each task has separate shadow stacks, so we need to switch them when switching tasks. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/percpu.rs | 21 +++++++-- kernel/src/mm/address_space.rs | 6 ++- kernel/src/task/schedule.rs | 78 +++++++++++++++++++++++++++++++++- 3 files changed, 98 insertions(+), 7 deletions(-) diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs index c35f6af5a..97c45fef0 100644 --- a/kernel/src/cpu/percpu.rs +++ b/kernel/src/cpu/percpu.rs @@ -25,10 +25,11 @@ use crate::mm::vm::{ VMReserved, VMR, }; use crate::mm::{ - virt_to_phys, PageBox, SVSM_CONTEXT_SWITCH_STACK, SVSM_PERCPU_BASE, SVSM_PERCPU_CAA_BASE, - SVSM_PERCPU_END, SVSM_PERCPU_TEMP_BASE_2M, SVSM_PERCPU_TEMP_BASE_4K, SVSM_PERCPU_TEMP_END_2M, - SVSM_PERCPU_TEMP_END_4K, SVSM_PERCPU_VMSA_BASE, SVSM_SHADOW_STACKS_INIT_TASK, - SVSM_SHADOW_STACK_ISST_DF_BASE, SVSM_STACKS_INIT_TASK, SVSM_STACK_IST_DF_BASE, + virt_to_phys, PageBox, SVSM_CONTEXT_SWITCH_SHADOW_STACK, SVSM_CONTEXT_SWITCH_STACK, + SVSM_PERCPU_BASE, SVSM_PERCPU_CAA_BASE, SVSM_PERCPU_END, SVSM_PERCPU_TEMP_BASE_2M, + SVSM_PERCPU_TEMP_BASE_4K, SVSM_PERCPU_TEMP_END_2M, SVSM_PERCPU_TEMP_END_4K, + SVSM_PERCPU_VMSA_BASE, SVSM_SHADOW_STACKS_INIT_TASK, SVSM_SHADOW_STACK_ISST_DF_BASE, + SVSM_STACKS_INIT_TASK, SVSM_STACK_IST_DF_BASE, }; use crate::platform::{SvsmPlatform, SVSM_PLATFORM}; use crate::sev::ghcb::{GhcbPage, GHCB}; @@ -504,6 +505,14 @@ impl PerCpu { Ok(()) } + fn allocate_context_switch_shadow_stack(&self) -> Result<(), SvsmError> { + self.allocate_shadow_stack( + SVSM_CONTEXT_SWITCH_SHADOW_STACK, + ShadowStackInit::ContextSwitch, + )?; + Ok(()) + } + fn allocate_ist_stacks(&self) -> Result<(), SvsmError> { let double_fault_stack = self.allocate_stack(SVSM_STACK_IST_DF_BASE)?; self.ist.double_fault_stack.set(Some(double_fault_stack)); @@ -637,6 +646,10 @@ impl PerCpu { // Allocate per-cpu context switch stack self.allocate_context_switch_stack()?; + if cfg!(feature = "shadow-stacks") { + self.allocate_context_switch_shadow_stack()?; + } + // Allocate IST stacks self.allocate_ist_stacks()?; diff --git a/kernel/src/mm/address_space.rs b/kernel/src/mm/address_space.rs index ec4859a6e..1cc444237 100644 --- a/kernel/src/mm/address_space.rs +++ b/kernel/src/mm/address_space.rs @@ -175,8 +175,12 @@ pub const SVSM_SHADOW_STACKS_INIT_TASK: VirtAddr = /// Stack address to use during context switches pub const SVSM_CONTEXT_SWITCH_STACK: VirtAddr = SVSM_SHADOW_STACKS_INIT_TASK.const_add(PAGE_SIZE); +/// Shadow stack address to use during context switches +pub const SVSM_CONTEXT_SWITCH_SHADOW_STACK: VirtAddr = + SVSM_CONTEXT_SWITCH_STACK.const_add(STACK_TOTAL_SIZE); + /// IST Stacks base address -pub const SVSM_STACKS_IST_BASE: VirtAddr = SVSM_CONTEXT_SWITCH_STACK.const_add(STACK_TOTAL_SIZE); +pub const SVSM_STACKS_IST_BASE: VirtAddr = SVSM_CONTEXT_SWITCH_SHADOW_STACK.const_add(PAGE_SIZE); /// DoubleFault IST stack base address pub const SVSM_STACK_IST_DF_BASE: VirtAddr = SVSM_STACKS_IST_BASE; diff --git a/kernel/src/task/schedule.rs b/kernel/src/task/schedule.rs index f4b70503d..949142e61 100644 --- a/kernel/src/task/schedule.rs +++ b/kernel/src/task/schedule.rs @@ -41,7 +41,7 @@ use crate::cpu::sse::sse_save_context; use crate::cpu::IrqGuard; use crate::error::SvsmError; use crate::locking::SpinLock; -use crate::mm::{STACK_TOTAL_SIZE, SVSM_CONTEXT_SWITCH_STACK}; +use crate::mm::{STACK_TOTAL_SIZE, SVSM_CONTEXT_SWITCH_SHADOW_STACK, SVSM_CONTEXT_SWITCH_STACK}; use alloc::sync::Arc; use core::arch::{asm, global_asm}; use core::cell::OnceCell; @@ -387,6 +387,13 @@ pub fn schedule_task(task: TaskPointer) { } global_asm!( + // Make the value of the `shadow-stacks` feature usable in assembly. + ".set const_false, 0", + ".set const_true, 1", + concat!( + ".set CFG_SHADOW_STACKS, const_", + cfg!(feature = "shadow-stacks") + ), r#" .text @@ -410,18 +417,42 @@ global_asm!( pushq %r15 pushq %rsp - // Save the current stack pointer + // If `prev` is not null... testq %rsi, %rsi jz 1f + + // Save the current stack pointer movq %rsp, {TASK_RSP_OFFSET}(%rsi) // Switch to a stack pointer that's valid in both the old and new page tables. mov ${CONTEXT_SWITCH_STACK}, %rsp + .if CFG_SHADOW_STACKS + // Save the current shadow stack pointer + rdssp %rax + sub $8, %rax + movq %rax, {TASK_SSP_OFFSET}(%rsi) + // Switch to a shadow stack that's valid in both page tables and move + // the "shadow stack restore token" to the old shadow stack. + mov ${CONTEXT_SWITCH_RESTORE_TOKEN}, %rax + rstorssp (%rax) + saveprevssp + .endif + 1: // Switch to the new task state + + // Switch to the new task page tables mov %rdx, %cr3 + .if CFG_SHADOW_STACKS + // Switch to the new task shadow stack and move the "shadow stack + // restore token" back. + mov {TASK_SSP_OFFSET}(%rdi), %rdx + rstorssp (%rdx) + saveprevssp + .endif + // Switch to the new task stack movq {TASK_RSP_OFFSET}(%rdi), %rsp @@ -449,7 +480,9 @@ global_asm!( ret "#, TASK_RSP_OFFSET = const offset_of!(Task, rsp), + TASK_SSP_OFFSET = const offset_of!(Task, ssp), CONTEXT_SWITCH_STACK = const CONTEXT_SWITCH_STACK.as_usize(), + CONTEXT_SWITCH_RESTORE_TOKEN = const CONTEXT_SWITCH_RESTORE_TOKEN.as_usize(), options(att_syntax) ); @@ -462,3 +495,44 @@ global_asm!( /// into both the old and the new set of page tables. That way we always have a /// valid stack to handle exceptions on. const CONTEXT_SWITCH_STACK: VirtAddr = SVSM_CONTEXT_SWITCH_STACK.const_add(STACK_TOTAL_SIZE); + +/// The location of a cpu-local shadow stack restore token that's mapped into +/// every set of page tables for use during context switches. +/// +/// One interesting difference between the normal stack pointer and the shadow +/// stack pointer is how they can be switched: For the normal stack pointer we +/// can just move a new value into the RSP register. This doesn't work for the +/// SSP register (the shadow stack pointer) because there's no way to directly +/// move a value into it. Instead we have to use the `rstorssp` instruction. +/// The key difference between this instruction and a regular `mov` is that +/// `rstorssp` expects a "shadow stack restore token" to be at the top of the +/// new shadow stack (this is just a special value that marks the top of a +/// inactive shadow stack). After switching to a new shadow stack, the previous +/// shadow stack is now inactive, and so the `saveprevssp` instruction can be +/// used to transfer the shadow stack restore token from the new shadow stack +/// to the previous one: `saveprevssp` atomically pops the stack token of the +/// new shadow stack and pushes it on the previous shadow stack. This means +/// that we have to execute both `rstorssp` and `saveprevssp` every time we +/// want to switch the shadow stacks. +/// +/// There's one major problem though: `saveprevssp` needs to access both the +/// previous and the new shadow stack, but we only map each shadow stack into a +/// single task's page tables. If each set of page tables only has access to +/// either the previous or the new shadow stack, but not both, we can't execute +/// `saveprevssp` and so we we can't move the shadow stack restore token to the +/// previous shadow stack. If there's no shadow stack restore token on the +/// previous shadow stack that means we can't restore this shadow stack at a +/// later point. To work around this, we map another shadow stack into each +/// CPU's set of pagetables. This allows us to do the following: +/// +/// 1. Switch to the context-switch shadow stack using `rstorssp`. +/// 2. Transfer the shadow stack restore token from the context switch shadow +/// stack to the previous shadow stack by executing `saveprevssp`. +/// 3. Switch the page tables. This doesn't lead to problems with the context +/// switch shadow stack because it's mapped into both page tables. +/// 4. Switch to the new shadow stack using `rstorssp`. +/// 5. Transfer the shadow stack restore token from the new shadow stack back +/// to the context switch shadow stacks by executing `saveprevssp`. +/// +/// We just switched between two shadow stack tables in different page tables :) +const CONTEXT_SWITCH_RESTORE_TOKEN: VirtAddr = SVSM_CONTEXT_SWITCH_SHADOW_STACK.const_add(0xff8); From 34ac60bf9a9756899e5873285a61302fbfd19743 Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Thu, 12 Sep 2024 15:04:12 +0000 Subject: [PATCH 12/15] svsm: enable shadow stack This enables shadow stacks for the BSP. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/shadow_stack.rs | 56 ++++++++++++++++++++++++++++++++++ kernel/src/svsm.rs | 5 +++ 2 files changed, 61 insertions(+) diff --git a/kernel/src/cpu/shadow_stack.rs b/kernel/src/cpu/shadow_stack.rs index 11b77c607..19fe7014c 100644 --- a/kernel/src/cpu/shadow_stack.rs +++ b/kernel/src/cpu/shadow_stack.rs @@ -1,6 +1,62 @@ // SPDX-License-Identifier: MIT OR Apache-2.0 +use bitflags::bitflags; + +use super::msr::read_msr; + +pub const S_CET: u32 = 0x6a2; pub const PL0_SSP: u32 = 0x6a4; pub const ISST_ADDR: u32 = 0x6a8; pub const MODE_64BIT: usize = 1; + +/// Enable shadow stacks. +/// +/// This code is placed in a macro instead of a function so that we don't have +/// to set up the shadow stack to return from this code. +#[macro_export] +macro_rules! enable_shadow_stacks { + ($bsp_percpu:ident) => {{ + use core::arch::asm; + use core::assert; + use svsm::address::Address; + use svsm::cpu::control_regs::{read_cr4, write_cr4, CR4Flags}; + use svsm::cpu::shadow_stack::{SCetFlags, MODE_64BIT, S_CET}; + + let token_addr = $bsp_percpu.get_top_of_shadow_stack(); + + // Enable CET in CR4. + let mut cr4 = read_cr4(); + assert!(!cr4.contains(CR4Flags::CET), "CET is already enabled"); + cr4 |= CR4Flags::CET; + write_cr4(cr4); + + unsafe { + asm!( + // Enable shadow stacks. + "wrmsr", + // Write a shadow stack restore token onto the stack. + "wrssq [{token_addr}], {token_val}", + // Load the shadow stack. + "rstorssp [{token_addr}]", + in("ecx") S_CET, + in("edx") 0, + in("eax") SCetFlags::SH_STK_EN.bits() | SCetFlags::WR_SHSTK_EN.bits(), + token_addr = in(reg) token_addr.bits(), + token_val = in(reg) token_addr.bits() + 8 + MODE_64BIT, + options(nostack, readonly), + ); + } + }}; +} + +pub fn read_s_cet() -> SCetFlags { + SCetFlags::from_bits_retain(read_msr(S_CET)) +} + +bitflags! { + pub struct SCetFlags: u64 { + const SH_STK_EN = 1 << 0; // Enables the shadow stacks + const WR_SHSTK_EN = 1 << 1; // Enables the WRSS instruction + } +} diff --git a/kernel/src/svsm.rs b/kernel/src/svsm.rs index 25690876a..4abd7880d 100755 --- a/kernel/src/svsm.rs +++ b/kernel/src/svsm.rs @@ -7,6 +7,7 @@ #![cfg_attr(not(test), no_std)] #![cfg_attr(not(test), no_main)] +use svsm::enable_shadow_stacks; use svsm::fw_meta::{print_fw_meta, validate_fw_memory, SevFWMetaData}; use bootlib::kernel_launch::KernelLaunchInfo; @@ -347,6 +348,10 @@ pub extern "C" fn svsm_start(li: &KernelLaunchInfo, vb_addr: usize) { .expect("Failed to run percpu.setup_on_cpu()"); bsp_percpu.load(); + if cfg!(feature = "shadow-stacks") { + enable_shadow_stacks!(bsp_percpu); + } + // Idle task must be allocated after PerCPU data is mapped bsp_percpu .setup_idle_task(svsm_main) From 8b6619cb569654d3c1928f49a963b289b4b34f03 Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Thu, 12 Sep 2024 15:05:59 +0000 Subject: [PATCH 13/15] vmsa: enable shadow stacks This enables shadow stacks on the secondary APs. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/percpu.rs | 3 +++ kernel/src/cpu/vmsa.rs | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs index 97c45fef0..8cb082e0a 100644 --- a/kernel/src/cpu/percpu.rs +++ b/kernel/src/cpu/percpu.rs @@ -735,6 +735,9 @@ impl PerCpu { vmsa.tr = self.vmsa_tr_segment(); vmsa.rip = start_rip; vmsa.rsp = self.get_top_of_stack().into(); + if cfg!(feature = "shadow-stacks") { + vmsa.ssp = self.get_top_of_shadow_stack().into(); + } vmsa.cr3 = self.get_pgtable().cr3_value().into(); vmsa.enable(); diff --git a/kernel/src/cpu/vmsa.rs b/kernel/src/cpu/vmsa.rs index 5133c3589..e32391fd8 100644 --- a/kernel/src/cpu/vmsa.rs +++ b/kernel/src/cpu/vmsa.rs @@ -13,6 +13,7 @@ use super::control_regs::{read_cr0, read_cr3, read_cr4}; use super::efer::read_efer; use super::gdt; use super::idt::common::idt; +use super::shadow_stack::read_s_cet; fn svsm_code_segment() -> VMSASegment { VMSASegment { @@ -66,6 +67,9 @@ pub fn init_svsm_vmsa(vmsa: &mut VMSA, vtom: u64) { vmsa.cr3 = read_cr3().bits() as u64; vmsa.cr4 = read_cr4().bits(); vmsa.efer = read_efer().bits(); + if cfg!(feature = "shadow-stacks") { + vmsa.s_cet = read_s_cet().bits(); + } vmsa.rflags = 0x2; vmsa.dr6 = 0xffff0ff0; From a66514a68e1afa5498d0e6bf66f47a84a3489445 Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Thu, 12 Sep 2024 15:06:53 +0000 Subject: [PATCH 14/15] idt: implement #CP handler This exception handler will be executed when the CPU detects a mismatch between the return address on the stack and the return address on the shadow stack. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/idt/entry.S | 4 ++++ kernel/src/cpu/idt/svsm.rs | 46 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/kernel/src/cpu/idt/entry.S b/kernel/src/cpu/idt/entry.S index 8d1681af8..cf03913e7 100644 --- a/kernel/src/cpu/idt/entry.S +++ b/kernel/src/cpu/idt/entry.S @@ -370,7 +370,11 @@ default_entry_no_ist name=xf handler=panic error_code=0 vector=19 // Vector 20 not defined // #CP Control-Protection Exception (Vector 21) +.if CFG_SHADOW_STACKS +default_entry_no_ist name=cp handler=control_protection error_code=1 vector=21 +.else default_entry_no_ist name=cp handler=panic error_code=1 vector=21 +.endif // Vectors 22-27 not defined diff --git a/kernel/src/cpu/idt/svsm.rs b/kernel/src/cpu/idt/svsm.rs index 120b402a5..b50ad8c8d 100644 --- a/kernel/src/cpu/idt/svsm.rs +++ b/kernel/src/cpu/idt/svsm.rs @@ -19,6 +19,7 @@ use crate::address::VirtAddr; use crate::cpu::registers::RFlags; use crate::cpu::X86ExceptionContext; use crate::debug::gdbstub::svsm_gdbstub::handle_debug_exception; +use crate::mm::GuestPtr; use crate::platform::SVSM_PLATFORM; use crate::task::{is_task_fault, terminate}; use core::arch::global_asm; @@ -205,6 +206,51 @@ extern "C" fn ex_handler_page_fault(ctxt: &mut X86ExceptionContext, vector: usiz } } +// Control-Protection handler +#[no_mangle] +extern "C" fn ex_handler_control_protection(ctxt: &mut X86ExceptionContext, _vector: usize) { + // From AMD64 Architecture Programmer's Manual, Volume 2, 8.4.3 + // Control-Protection Error Code: + /// A RET (near) instruction encountered a return address mismatch. + const NEAR_RET: usize = 1; + /// A RET (far) or IRET instruction encountered a return address mismatch. + const FAR_RET_IRET: usize = 2; + /// An RSTORSSP instruction encountered an invalid shadow stack restore + /// token. + const RSTORSSP: usize = 4; + /// A SETSSBSY instruction encountered an invalid supervisor shadow stack + /// token. + const SETSSBSY: usize = 5; + + let rip = ctxt.frame.rip; + match ctxt.error_code & 0x7fff { + code @ (NEAR_RET | FAR_RET_IRET) => { + // Read the return address on the normal stack. + let ret_ptr: GuestPtr = GuestPtr::new(VirtAddr::from(ctxt.frame.rsp)); + let ret = unsafe { ret_ptr.read() }.expect("Failed to read return address"); + + // Read the return address on the shadow stack. + let prev_rssp_ptr: GuestPtr = GuestPtr::new(VirtAddr::from(ctxt.ssp)); + let prev_rssp = unsafe { prev_rssp_ptr.read() } + .expect("Failed to read address of previous shadow stack pointer"); + // The offset to the return pointer is different for RET and IRET. + let offset = if code == NEAR_RET { 0 } else { 8 }; + let ret_ptr: GuestPtr = GuestPtr::new(VirtAddr::from(prev_rssp + offset)); + let ret_on_ssp = + unsafe { ret_ptr.read() }.expect("Failed to read return address on shadow stack"); + + panic!("thread at {rip:#018x} tried to return to {ret:#x}, but return address on shadow stack was {ret_on_ssp:#x}!"); + } + RSTORSSP => { + panic!("rstorssp instruction encountered an unexpected shadow stack restore token at RIP {rip:#018x}"); + } + SETSSBSY => { + panic!("setssbsy instruction encountered an unexpected supervisor shadow stack token at RIP {rip:#018x}"); + } + code => unreachable!("unexpected code for #CP exception: {code}"), + } +} + // VMM Communication handler #[no_mangle] extern "C" fn ex_handler_vmm_communication(ctxt: &mut X86ExceptionContext, vector: usize) { From 9de01ca9c3ae0de6f75681f478cc02f16590d8aa Mon Sep 17 00:00:00 2001 From: Tom Dohrmann Date: Wed, 18 Sep 2024 13:38:25 +0000 Subject: [PATCH 15/15] shadow_stack: determine support at runtime Trusted CPUID values are hard to come by, so let's just try to enable CET in CR4 and handle failure gracefully. Signed-off-by: Tom Dohrmann --- kernel/src/cpu/idt/common.rs | 3 +- kernel/src/cpu/idt/entry.S | 13 ++++++++- kernel/src/cpu/idt/svsm.rs | 2 ++ kernel/src/cpu/percpu.rs | 12 ++++---- kernel/src/cpu/shadow_stack.rs | 51 ++++++++++++++++++++++++++++------ kernel/src/cpu/vmsa.rs | 4 +-- kernel/src/svsm.rs | 4 ++- kernel/src/task/schedule.rs | 10 +++++-- kernel/src/task/tasks.rs | 5 ++-- 9 files changed, 81 insertions(+), 23 deletions(-) diff --git a/kernel/src/cpu/idt/common.rs b/kernel/src/cpu/idt/common.rs index a97d7e29c..6e46c879c 100644 --- a/kernel/src/cpu/idt/common.rs +++ b/kernel/src/cpu/idt/common.rs @@ -11,6 +11,7 @@ use crate::cpu::control_regs::{read_cr0, read_cr4}; use crate::cpu::efer::read_efer; use crate::cpu::gdt::gdt; use crate::cpu::registers::{X86GeneralRegs, X86InterruptFrame}; +use crate::cpu::shadow_stack::is_cet_ss_supported; use crate::insn_decode::{InsnError, InsnMachineCtx, InsnMachineMem, Register, SegRegister}; use crate::locking::{RWLock, ReadLockGuard, WriteLockGuard}; use crate::mm::GuestPtr; @@ -72,7 +73,7 @@ impl X86ExceptionContext { pub fn set_rip(&mut self, new_rip: usize) { self.frame.rip = new_rip; - if cfg!(feature = "shadow-stacks") { + if is_cet_ss_supported() { // Update the instruction pointer on the shadow stack. let return_on_stack = (self.ssp + 8) as *const usize; let return_on_stack_val = new_rip; diff --git a/kernel/src/cpu/idt/entry.S b/kernel/src/cpu/idt/entry.S index cf03913e7..59a9a4b83 100644 --- a/kernel/src/cpu/idt/entry.S +++ b/kernel/src/cpu/idt/entry.S @@ -115,8 +115,11 @@ asm_entry_hv: movq %rbx, 0x20(%rsp) .if CFG_SHADOW_STACKS // Update RIP on the shadow stack to the cancel point. + cmpb $0, {IS_CET_SUPPORTED}(%rip) + je 2f rdsspq %rax wrssq %rbx, 8(%rax) + 2: .endif // Defer any further processing until interrupts can be processed. jmp postpone_hv @@ -158,10 +161,15 @@ hv_not_vmpl_switch: // points to the register save area, and continue with #HV // processing. movq %rcx, %rsp + .if CFG_SHADOW_STACKS // Pop the current stack frame, so that the previous stack frame sits // on top of the shadow stack. + cmpb $0, {IS_CET_SUPPORTED}(%rip) + je 2f movl $3, %eax incsspq %rax + 2: + .endif jmp handle_as_hv postpone_hv: @@ -207,8 +215,11 @@ restart_hv: .if CFG_SHADOW_STACKS // Pop the current stack frame, so that the previous stack frame sits // on top of the shadow stack. + cmpb $0, {IS_CET_SUPPORTED}(%rip) + je 2f movl $3, %eax - incsspq %rax + incsspq %rax + 2: .endif continue_hv: diff --git a/kernel/src/cpu/idt/svsm.rs b/kernel/src/cpu/idt/svsm.rs index b50ad8c8d..cbb3df848 100644 --- a/kernel/src/cpu/idt/svsm.rs +++ b/kernel/src/cpu/idt/svsm.rs @@ -17,6 +17,7 @@ use super::common::{ }; use crate::address::VirtAddr; use crate::cpu::registers::RFlags; +use crate::cpu::shadow_stack::IS_CET_SUPPORTED; use crate::cpu::X86ExceptionContext; use crate::debug::gdbstub::svsm_gdbstub::handle_debug_exception; use crate::mm::GuestPtr; @@ -333,5 +334,6 @@ global_asm!( include_str!("../x86/smap.S"), include_str!("entry.S"), IF = const RFlags::IF.bits(), + IS_CET_SUPPORTED = sym IS_CET_SUPPORTED, options(att_syntax) ); diff --git a/kernel/src/cpu/percpu.rs b/kernel/src/cpu/percpu.rs index 8cb082e0a..5ef4330cc 100644 --- a/kernel/src/cpu/percpu.rs +++ b/kernel/src/cpu/percpu.rs @@ -9,7 +9,7 @@ extern crate alloc; use super::gdt_mut; use super::isst::Isst; use super::msr::write_msr; -use super::shadow_stack::ISST_ADDR; +use super::shadow_stack::{is_cet_ss_supported, ISST_ADDR}; use super::tss::{X86Tss, IST_DF}; use crate::address::{Address, PhysAddr, VirtAddr}; use crate::cpu::idt::common::INT_INJ_VECTOR; @@ -639,14 +639,14 @@ impl PerCpu { // Allocate per-cpu init stack self.allocate_init_stack()?; - if cfg!(feature = "shadow-stacks") { + if is_cet_ss_supported() { self.allocate_init_shadow_stack()?; } // Allocate per-cpu context switch stack self.allocate_context_switch_stack()?; - if cfg!(feature = "shadow-stacks") { + if is_cet_ss_supported() { self.allocate_context_switch_shadow_stack()?; } @@ -656,7 +656,7 @@ impl PerCpu { // Setup TSS self.setup_tss(); - if cfg!(feature = "shadow-stacks") { + if is_cet_ss_supported() { // Allocate ISST shadow stacks self.allocate_isst_shadow_stacks()?; @@ -709,7 +709,7 @@ impl PerCpu { pub fn load(&self) { self.load_pgtable(); self.load_tss(); - if cfg!(feature = "shadow-stacks") { + if is_cet_ss_supported() { self.load_isst(); } } @@ -735,7 +735,7 @@ impl PerCpu { vmsa.tr = self.vmsa_tr_segment(); vmsa.rip = start_rip; vmsa.rsp = self.get_top_of_stack().into(); - if cfg!(feature = "shadow-stacks") { + if is_cet_ss_supported() { vmsa.ssp = self.get_top_of_shadow_stack().into(); } vmsa.cr3 = self.get_pgtable().cr3_value().into(); diff --git a/kernel/src/cpu/shadow_stack.rs b/kernel/src/cpu/shadow_stack.rs index 19fe7014c..81d6b497b 100644 --- a/kernel/src/cpu/shadow_stack.rs +++ b/kernel/src/cpu/shadow_stack.rs @@ -1,5 +1,10 @@ // SPDX-License-Identifier: MIT OR Apache-2.0 +use core::{ + arch::asm, + sync::atomic::{AtomicBool, Ordering}, +}; + use bitflags::bitflags; use super::msr::read_msr; @@ -10,6 +15,44 @@ pub const ISST_ADDR: u32 = 0x6a8; pub const MODE_64BIT: usize = 1; +pub static IS_CET_SUPPORTED: AtomicBool = AtomicBool::new(false); + +// Try to enable the CET feature in CR4 and set `IS_CET_SUPPORTED` if successful. +pub fn determine_cet_support() { + // Don't try to determine support if the shadow-stacks feature is not enabled. + if !cfg!(feature = "shadow-stacks") { + return; + } + + let rcx: u64; + unsafe { + asm!(// Try to enable CET in CR4. + " mov %cr4, %rax", + " or $1<<23, %rax", + "1: mov %rax, %cr4", + " xorq %rcx, %rcx", + "2:", + ".pushsection \"__exception_table\",\"a\"", + ".balign 16", + ".quad (1b)", + ".quad (2b)", + ".popsection", + out("rax") _, + out("rcx") rcx, + options(att_syntax, nostack, nomem, pure, preserves_flags)); + } + + IS_CET_SUPPORTED.store(rcx == 0, Ordering::Relaxed); +} + +/// Returns whether shadow stacks are supported by the CPU and the kernel. +#[inline(always)] +pub fn is_cet_ss_supported() -> bool { + // In theory CPUs can have support for CET, but not CET_SS, but in practice + // no such CPUs exist. Treat CET being supported as CET_SS being supported. + cfg!(feature = "shadow-stacks") && IS_CET_SUPPORTED.load(Ordering::Relaxed) +} + /// Enable shadow stacks. /// /// This code is placed in a macro instead of a function so that we don't have @@ -18,19 +61,11 @@ pub const MODE_64BIT: usize = 1; macro_rules! enable_shadow_stacks { ($bsp_percpu:ident) => {{ use core::arch::asm; - use core::assert; use svsm::address::Address; - use svsm::cpu::control_regs::{read_cr4, write_cr4, CR4Flags}; use svsm::cpu::shadow_stack::{SCetFlags, MODE_64BIT, S_CET}; let token_addr = $bsp_percpu.get_top_of_shadow_stack(); - // Enable CET in CR4. - let mut cr4 = read_cr4(); - assert!(!cr4.contains(CR4Flags::CET), "CET is already enabled"); - cr4 |= CR4Flags::CET; - write_cr4(cr4); - unsafe { asm!( // Enable shadow stacks. diff --git a/kernel/src/cpu/vmsa.rs b/kernel/src/cpu/vmsa.rs index e32391fd8..471b69725 100644 --- a/kernel/src/cpu/vmsa.rs +++ b/kernel/src/cpu/vmsa.rs @@ -13,7 +13,7 @@ use super::control_regs::{read_cr0, read_cr3, read_cr4}; use super::efer::read_efer; use super::gdt; use super::idt::common::idt; -use super::shadow_stack::read_s_cet; +use super::shadow_stack::{is_cet_ss_supported, read_s_cet}; fn svsm_code_segment() -> VMSASegment { VMSASegment { @@ -67,7 +67,7 @@ pub fn init_svsm_vmsa(vmsa: &mut VMSA, vtom: u64) { vmsa.cr3 = read_cr3().bits() as u64; vmsa.cr4 = read_cr4().bits(); vmsa.efer = read_efer().bits(); - if cfg!(feature = "shadow-stacks") { + if is_cet_ss_supported() { vmsa.s_cet = read_s_cet().bits(); } diff --git a/kernel/src/svsm.rs b/kernel/src/svsm.rs index 4abd7880d..513776e26 100755 --- a/kernel/src/svsm.rs +++ b/kernel/src/svsm.rs @@ -7,6 +7,7 @@ #![cfg_attr(not(test), no_std)] #![cfg_attr(not(test), no_main)] +use svsm::cpu::shadow_stack::{determine_cet_support, is_cet_ss_supported}; use svsm::enable_shadow_stacks; use svsm::fw_meta::{print_fw_meta, validate_fw_memory, SevFWMetaData}; @@ -312,6 +313,7 @@ pub extern "C" fn svsm_start(li: &KernelLaunchInfo, vb_addr: usize) { cr0_init(); cr4_init(platform); + determine_cet_support(); install_console_logger("SVSM").expect("Console logger already initialized"); platform .env_setup(debug_serial_port, launch_info.vtom.try_into().unwrap()) @@ -348,7 +350,7 @@ pub extern "C" fn svsm_start(li: &KernelLaunchInfo, vb_addr: usize) { .expect("Failed to run percpu.setup_on_cpu()"); bsp_percpu.load(); - if cfg!(feature = "shadow-stacks") { + if is_cet_ss_supported() { enable_shadow_stacks!(bsp_percpu); } diff --git a/kernel/src/task/schedule.rs b/kernel/src/task/schedule.rs index 949142e61..e7b5e64a3 100644 --- a/kernel/src/task/schedule.rs +++ b/kernel/src/task/schedule.rs @@ -35,7 +35,7 @@ use super::{Task, TaskListAdapter, TaskPointer, TaskRunListAdapter}; use crate::address::{Address, VirtAddr}; use crate::cpu::msr::write_msr; use crate::cpu::percpu::{irq_nesting_count, this_cpu}; -use crate::cpu::shadow_stack::PL0_SSP; +use crate::cpu::shadow_stack::{is_cet_ss_supported, IS_CET_SUPPORTED, PL0_SSP}; use crate::cpu::sse::sse_restore_context; use crate::cpu::sse::sse_save_context; use crate::cpu::IrqGuard; @@ -354,7 +354,7 @@ pub fn schedule() { } this_cpu().set_tss_rsp0(next.stack_bounds.end()); - if cfg!(feature = "shadow-stacks") { + if is_cet_ss_supported() { write_msr(PL0_SSP, next.exception_shadow_stack.bits() as u64); } @@ -428,6 +428,8 @@ global_asm!( mov ${CONTEXT_SWITCH_STACK}, %rsp .if CFG_SHADOW_STACKS + cmpb $0, {IS_CET_SUPPORTED}(%rip) + je 1f // Save the current shadow stack pointer rdssp %rax sub $8, %rax @@ -446,11 +448,14 @@ global_asm!( mov %rdx, %cr3 .if CFG_SHADOW_STACKS + cmpb $0, {IS_CET_SUPPORTED}(%rip) + je 2f // Switch to the new task shadow stack and move the "shadow stack // restore token" back. mov {TASK_SSP_OFFSET}(%rdi), %rdx rstorssp (%rdx) saveprevssp + 2: .endif // Switch to the new task stack @@ -481,6 +486,7 @@ global_asm!( "#, TASK_RSP_OFFSET = const offset_of!(Task, rsp), TASK_SSP_OFFSET = const offset_of!(Task, ssp), + IS_CET_SUPPORTED = sym IS_CET_SUPPORTED, CONTEXT_SWITCH_STACK = const CONTEXT_SWITCH_STACK.as_usize(), CONTEXT_SWITCH_RESTORE_TOKEN = const CONTEXT_SWITCH_RESTORE_TOKEN.as_usize(), options(att_syntax) diff --git a/kernel/src/task/tasks.rs b/kernel/src/task/tasks.rs index 908b02f72..77ff29f5c 100644 --- a/kernel/src/task/tasks.rs +++ b/kernel/src/task/tasks.rs @@ -16,6 +16,7 @@ use core::sync::atomic::{AtomicU32, Ordering}; use crate::address::{Address, VirtAddr}; use crate::cpu::idt::svsm::return_new_task; use crate::cpu::percpu::PerCpu; +use crate::cpu::shadow_stack::is_cet_ss_supported; use crate::cpu::sse::{get_xsave_area_size, sse_restore_context}; use crate::cpu::X86ExceptionContext; use crate::cpu::{irqs_enable, X86GeneralRegs}; @@ -196,7 +197,7 @@ impl Task { let mut shadow_stack_offset = VirtAddr::null(); let mut exception_shadow_stack = VirtAddr::null(); - if cfg!(feature = "shadow-stacks") { + if is_cet_ss_supported() { let shadow_stack; (shadow_stack, shadow_stack_offset) = VMKernelShadowStack::new( SVSM_PERTASK_SHADOW_STACK_BASE, @@ -270,7 +271,7 @@ impl Task { let mut shadow_stack_offset = VirtAddr::null(); let mut exception_shadow_stack = VirtAddr::null(); - if cfg!(feature = "shadow-stacks") { + if is_cet_ss_supported() { let shadow_stack; (shadow_stack, shadow_stack_offset) = VMKernelShadowStack::new( SVSM_PERTASK_SHADOW_STACK_BASE,