From 114837e66853cfed66421164ae56544ef156fcd9 Mon Sep 17 00:00:00 2001 From: Dmitrii Kuvaiskii Date: Wed, 17 Jul 2024 01:58:23 -0700 Subject: [PATCH] [LibOS] Rewire RIP of to-save and to-restore contexts for VM PALs Previously, the LibOS logic had two places that assumed ring-3 environment and did not account for the need to execute `sysret`/`iretq`: - Preparing sigframes for the application signal handler; the interrupted app context is saved in the sigframe. - Performing `rt_sigreturn()` syscall after the application signal handler is done; the app context is restored from the previously-saved sigframe. In case of VM-based PALs, where LibOS runs in ring-0 and app runs in ring-3, there are wrappers around syscall enter/exit, see `syscall_asm` and `sysret_asm`/`sigreturn_asm` in vm-common/kernel_events.S file. These wrappers rewire the context RIP: upon syscall entry, `syscall_asm` saves the app context RIP into a TCB-local variable and sets RIP to the address of the `sysret_asm`/`sigreturn_asm` routine, and upon syscall exit, `sysret_asm`/`sigreturn_asm` restores app context RIP from the TCB-local variable. This wrapper rewiring was not accounted for in the LibOS logic, meaning that the LibOS would prepare the sigframe with RIP not of the app context but of the `sysret_asm`/`sigreturn_asm` routine. Similarly, the LibOS would restore not the app context's RIP but the RIP of `sysret_asm`/`sigreturn_asm`. This commit adds VM PAL-specific RIP fixups to the LibOS logic. Signed-off-by: Dmitrii Kuvaiskii --- libos/src/arch/x86_64/libos_context.c | 22 ++++++++++++++++++++++ libos/src/sys/libos_sigaction.c | 15 +++++++++++++++ pal/include/pal/pal.h | 5 +++++ pal/src/host/tdx/pal_main.c | 3 +++ pal/src/host/vm/pal_main.c | 3 +++ 5 files changed, 48 insertions(+) diff --git a/libos/src/arch/x86_64/libos_context.c b/libos/src/arch/x86_64/libos_context.c index 2db22674f..adb058ffc 100644 --- a/libos/src/arch/x86_64/libos_context.c +++ b/libos/src/arch/x86_64/libos_context.c @@ -244,6 +244,16 @@ void prepare_sigframe(PAL_CONTEXT* context, siginfo_t* siginfo, void* handler, v pal_context_to_ucontext(&sigframe->uc, context); + /* FIXME: make this VM/TDX PAL logic more generic */ + if (!strcmp(g_pal_public_state->host_type, "VM") || + !strcmp(g_pal_public_state->host_type, "TDX")) { + /* Rewire saved sigframe RIP to the where-to-return app code RIP (saved in PAL's TCB) */ + uint64_t user_rip; + void* user_rip_ptr = (char*)pal_get_tcb() + g_pal_public_state->vm_user_rip_offset; + memcpy(&user_rip, user_rip_ptr, sizeof(user_rip)); + ucontext_set_ip(&sigframe->uc, user_rip); + } + /* XXX: Currently we assume that `struct libos_xstate`, `PAL_XREGS_STATE` and `struct _fpstate` * (just the header) are the very same structure. This mess needs to be fixed. */ static_assert(sizeof(struct libos_xstate) == sizeof(PAL_XREGS_STATE), @@ -267,6 +277,18 @@ void prepare_sigframe(PAL_CONTEXT* context, siginfo_t* siginfo, void* handler, v stack -= 8; *(uint64_t*)stack = (uint64_t)restorer; + /* + * FIXME: Here the execution context is rewired to jump to the app's signal handler; afterwards + * the app will pop the frame from the stack and end up calling the restorer (set up on the + * stack above), which will call rt_sigreturn syscall. This rewiring bypasses the ring-0/ring-3 + * wrapper code of VM-based PALs. + * + * However, this works on VM-based PALs (though incorrect isolation-wise). That's because even + * though the app signal handler will execute in ring-0, VM PALs have a flat 1:1 + * virtual-to-physical memory mapping, and the app signal handler can access all memory + * normally; and when the signal handler performs syscall(rt_sigreturn) instruction, x86 arch + * allows to call this instruction even from ring-0. + */ context->rip = (uint64_t)handler; context->rsp = stack; /* x64 SysV ABI mandates that DF flag is cleared and states that rest of flags is *not* diff --git a/libos/src/sys/libos_sigaction.c b/libos/src/sys/libos_sigaction.c index a4ceedbf7..43975433f 100644 --- a/libos/src/sys/libos_sigaction.c +++ b/libos/src/sys/libos_sigaction.c @@ -63,11 +63,26 @@ long libos_syscall_rt_sigaction(int signum, const struct __kernel_sigaction* act long libos_syscall_rt_sigreturn(void) { PAL_CONTEXT* context = LIBOS_TCB_GET(context.regs); + void* orig_rip = (void*)pal_context_get_ip(context); __sigset_t new_mask; restore_sigreturn_context(context, &new_mask); clear_illegal_signals(&new_mask); + /* FIXME: make this VM/TDX PAL logic more generic */ + if (!strcmp(g_pal_public_state->host_type, "VM") || + !strcmp(g_pal_public_state->host_type, "TDX")) { + /* + * Rewire context RIP to the VM/TDX PAL sysret trampoline: + * - orig_rip contains the RIP of the sysret trampoline code (see PAL's kernel_events.S) + * - restored rip contains the RIP of the where-to-return app code + */ + void* restored_rip = (void*)pal_context_get_ip(context); + void* user_rip_ptr = (char*)pal_get_tcb() + g_pal_public_state->vm_user_rip_offset; + memcpy(user_rip_ptr, &restored_rip, sizeof(restored_rip)); + pal_context_set_ip(context, (uintptr_t)orig_rip); + } + struct libos_thread* current = get_cur_thread(); lock(¤t->lock); set_sig_mask(current, &new_mask); diff --git a/pal/include/pal/pal.h b/pal/include/pal/pal.h index 259082fb7..6f2c1385b 100644 --- a/pal/include/pal/pal.h +++ b/pal/include/pal/pal.h @@ -156,6 +156,11 @@ struct pal_public_state { `pal_memory.c` for more details */ size_t initial_mem_ranges_len; + /* + * App context on syscall + */ + uint64_t vm_user_rip_offset; + /* * Host information */ diff --git a/pal/src/host/tdx/pal_main.c b/pal/src/host/tdx/pal_main.c index 4604a3277..24e286320 100644 --- a/pal/src/host/tdx/pal_main.c +++ b/pal/src/host/tdx/pal_main.c @@ -447,6 +447,9 @@ noreturn int pal_start_continue(void* cmdline_) { g_pal_public_state.attestation_type = "dcap"; + g_pal_public_state.vm_user_rip_offset = offsetof(struct pal_tcb_vm, + kernel_thread.context.user_rip); + ret = pal_common_get_topo_info(&g_pal_public_state.topo_info); if (ret < 0) INIT_FAIL("Failed to get topology information: %s", pal_strerror(ret)); diff --git a/pal/src/host/vm/pal_main.c b/pal/src/host/vm/pal_main.c index 7061199f0..4e1793d06 100644 --- a/pal/src/host/vm/pal_main.c +++ b/pal/src/host/vm/pal_main.c @@ -347,6 +347,9 @@ noreturn int pal_start_continue(void* cmdline_) { g_pal_public_state.attestation_type = "none"; + g_pal_public_state.vm_user_rip_offset = offsetof(struct pal_tcb_vm, + kernel_thread.context.user_rip); + ret = pal_common_get_topo_info(&g_pal_public_state.topo_info); if (ret < 0) INIT_FAIL("Failed to get topology information: %s", pal_strerror(ret));