Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

c18n: [Draft] Save caller's stack pointer in trusted frame #2060

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 37 additions & 30 deletions libexec/rtld-elf/aarch64/rtld_c18n_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
*/

#include <machine/asm.h>
#define IN_ASM
#include "rtld_c18n_machdep.h"
#undef IN_ASM

ENTRY(_rtld_setjmp)
#ifdef __ARM_MORELLO_PURECAP_BENCHMARK_ABI
Expand Down Expand Up @@ -345,7 +348,7 @@ TRAMP(tramp_save_caller)
*/
gclim x11, c10
scvalue c18, c10, x11
ldr c17, [c18, #-CAP_WIDTH]
ldr x17, [c18, #-CAP_WIDTH]
str c10, [c18, #-CAP_WIDTH]

#ifdef __ARM_MORELLO_PURECAP_BENCHMARK_ABI
Expand All @@ -364,14 +367,15 @@ TRAMP(tramp_save_caller)
2: add x13, x13, #0 /* To be patched at runtime */

/* Push frame */
stp c29, c30, [TRUSTED_STACK, #-(CAP_WIDTH * 14)]!
stp c29, c30, [TRUSTED_STACK, #-(CAP_WIDTH * C18N_TRUSTED_FRAME_SIZE)]!
stp x12, x13, [TRUSTED_STACK, #(CAP_WIDTH * 2)]
stp c17, c19, [TRUSTED_STACK, #(CAP_WIDTH * 3)]
stp c20, c21, [TRUSTED_STACK, #(CAP_WIDTH * 5)]
stp c22, c23, [TRUSTED_STACK, #(CAP_WIDTH * 7)]
stp c24, c25, [TRUSTED_STACK, #(CAP_WIDTH * 9)]
stp c26, c27, [TRUSTED_STACK, #(CAP_WIDTH * 11)]
str c28, [TRUSTED_STACK, #(CAP_WIDTH * 13)]
str c10, [TRUSTED_STACK, #(CAP_WIDTH * 3)]
stp x17, x12, [TRUSTED_STACK, #(CAP_WIDTH * 4)]
stp c19, c20, [TRUSTED_STACK, #(CAP_WIDTH * 5)]
stp c21, c22, [TRUSTED_STACK, #(CAP_WIDTH * 7)]
stp c23, c24, [TRUSTED_STACK, #(CAP_WIDTH * 9)]
stp c25, c26, [TRUSTED_STACK, #(CAP_WIDTH * 11)]
stp c27, c28, [TRUSTED_STACK, #(CAP_WIDTH * 13)]
#ifdef __ARM_MORELLO_PURECAP_BENCHMARK_ABI
msr rcsp_el0, TRUSTED_STACK
#endif
Expand Down Expand Up @@ -435,12 +439,12 @@ TRAMP(tramp_switch_stack)
/*
* If the stack table index is out-of-bounds, set it to zero.
*/
csel w17, w14, wzr, hi
csel w26, w14, wzr, hi
/*
* Load the callee's stack if the stack table index is within bounds.
* Otherwise the resolver will be loaded.
*/
ldr c20, [c30, w17, uxtw #4]
ldr c20, [c30, w26, uxtw #4]
/*
* If the resolver has been loaded, set the branch target to it.
*/
Expand Down Expand Up @@ -529,8 +533,8 @@ TRAMP(tramp_invoke_res)
mov x23, xzr
mov x24, xzr
mov x25, xzr
mov x26, xzr
/*
* - c26: Stack table index (scalar)
* - c27: Test result (scalar)
* - c28: Permission bits (scalar)
* - c29: Frame pointer (scalar)
Expand All @@ -539,13 +543,13 @@ TRAMP(tramp_invoke_res)
/*
* Clear temporary registers, except
* - c10: Callee's stack
* - c11: Top of caller's stack (scalar)
* - c12: Link to previous frame (scalar)
* - c13: Number of unused return argument registers (scalar)
* - c11: Limit of caller's stack (scalar)
* - c12: Old trusted stack (scalar)
* - c13: Cookie and number of unused return argument registers (scalar)
* - c14: Callee's compartment ID (scalar)
* - c15: Length of stack table (scalar)
* - c16: Comparison result (scalar)
* - c17: Stack table index (scalar)
* - c17: Old bottom of caller's stack (scalar)
* - c18: CHERI_PERM_EXECUTE (scalar)
*/

Expand All @@ -563,20 +567,20 @@ TRAMP(tramp_pop_frame)
/* Restore callee-saved registers */
ldp c29, c30, [TRUSTED_STACK]
ldp x10, x11, [TRUSTED_STACK, #(CAP_WIDTH * 2)]
ldp c12, c19, [TRUSTED_STACK, #(CAP_WIDTH * 3)]
ldp c20, c21, [TRUSTED_STACK, #(CAP_WIDTH * 5)]
ldp c22, c23, [TRUSTED_STACK, #(CAP_WIDTH * 7)]
ldp c24, c25, [TRUSTED_STACK, #(CAP_WIDTH * 9)]
ldp c26, c27, [TRUSTED_STACK, #(CAP_WIDTH * 11)]
ldr c28, [TRUSTED_STACK, #(CAP_WIDTH * 13)]
ldp c15, c12, [TRUSTED_STACK, #(CAP_WIDTH * 3)]
ldp c19, c20, [TRUSTED_STACK, #(CAP_WIDTH * 5)]
ldp c21, c22, [TRUSTED_STACK, #(CAP_WIDTH * 7)]
ldp c23, c24, [TRUSTED_STACK, #(CAP_WIDTH * 9)]
ldp c25, c26, [TRUSTED_STACK, #(CAP_WIDTH * 11)]
ldp c27, c28, [TRUSTED_STACK, #(CAP_WIDTH * 13)]

/*
* Restore caller's saved rcsp.
*/
gclim x13, c12
scvalue c14, c12, x13
ldr c15, [c14, #-CAP_WIDTH]
str c12, [c14, #-CAP_WIDTH]
gclim x13, c15
scvalue c14, c15, x13
scvalue c2, c15, x12
str c2, [c14, #-CAP_WIDTH]

/*
* Clear unused return value registers. The registers to clear is
Expand All @@ -598,7 +602,10 @@ TRAMP(tramp_pop_frame)
msr rcsp_el0, TRUSTED_STACK
#endif

mov x2, xzr
/*
* Clear temporary registers, except
* - c2: Old bottom of caller's stack
*/
mov x3, xzr
mov x4, xzr
mov x5, xzr
Expand All @@ -610,10 +617,10 @@ TRAMP(tramp_pop_frame)
/*
* Clear temporary registers, except
* - c10: Link to previous frame (scalar)
* - c11: Number of unused return argument registers (scalar)
* - c12: Old top of caller's stack
* - c13: Bottom of caller's stack (scalar)
* - c14: Bottom of caller's stack
* - c11: Cookie and number of unused return argument registers (scalar)
* - c12: Old bottom of caller's stack (scalar)
* - c13: Limit of caller's stack (scalar)
* - c14: Limit of caller's stack
* - c15: Current top of caller's stack
* - c16: Logical operation result (scalar)
* - c17: Comparison result (scalar)
Expand Down
34 changes: 32 additions & 2 deletions libexec/rtld-elf/aarch64/rtld_c18n_machdep.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,50 @@
#ifndef RTLD_C18N_MACHDEP_H
#define RTLD_C18N_MACHDEP_H

#define C18N_TRUSTED_FRAME_SIZE 15

#ifndef IN_ASM
/*
* Stack unwinding
*/
struct trusted_frame {
void *fp;
void *pc;
/*
* Address of the next trusted frame
*/
ptraddr_t next;
/*
* Number of return value registers, encoded in enum tramp_ret_args
*/
uint8_t ret_args : 2;
/*
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, is libunwind using this? GDB doesn't use the cookie, it matches on the instruction sequence just like for signal frames. That is, I don't understand how cookie is different from pc?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

libunwind uses this to detect if the current return address is a trampoline and we need to unwind from the trusted frame.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pc is what the trampoline should return to. cookie is where the callee should return to. It is an address within the trampoline.

Checking the Executive bit of the return address to determine whether we are at a compartment boundary is unreliable because we might be returning into RTLD directly. Checking that the return address matches the cookie gives full assurance, and it also works under the benchmark ABI where everything is Executive.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess what I don't understand is that since the cookie is already on the trusted stack, doesn't libunwind have to know before it can even see the cookie that it needs to read from the trusted stack (ECSP) instead of the normal stack (RCSP)? That is, when libunwind is unwinding from the uppermost frame of a compartment back out to the compartment switch, it gets a CLR (and thus PCC) value that points into a trampoline. How does it then decide that for this new frame that it is a compartment frame that is a compartment boundary thus requiring it to go looking in ECSP instead or RCSP for its saved frame? If this were DWARF based, you would look up the FDE/CFI based on the unwound PC value and it would tell you the register for CFA and the offsets of saved registers relative to the CFA. For custom unwinders in GDB including signal frames, GDB does a pattern match against known instructions (e.g. the first 3 instructions CLR points to) to locate a custom unwinder. Does CFP in this case store a scalar address back into the trusted stack (yes, that appears to be true) that libunwind notices is out of the bounds of RCSP and thus assume it might be on the trusted stack? If you aren't doing the bounds check, how do you avoid potential false positives if the stack garbage at the offset of cookie in a "normal" frame happens to match PC? If you are doing the bounds check, isn't that alone sufficient to know you are on an alternate stack? (And you could even check if it is in bounds of the trusted stack which libunwind presumably has access to.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't libunwind have to know before it can even see the cookie that it needs to read from the trusted stack (ECSP) instead of the normal stack (RCSP)?

libunwind always just blindly checks whether the CLR matches the cookie of the top trusted frame to decide if it's at a compartment boundary. (See https://github.com/CTSRD-CHERI/llvm-project/pull/731/files#diff-6dd9111398b6f61443d5eec566d751eb857a8ee262fe9c5f38c192881826300dR363 which is being used at https://github.com/CTSRD-CHERI/llvm-project/pull/731/files#diff-6dd9111398b6f61443d5eec566d751eb857a8ee262fe9c5f38c192881826300dR602)

The cookie feature was implemented before I started to properly set the CFP value, and your suggestion of checking if CFP is in bounds does seem like a viable idea. @dstolfa What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds like it could work, I'll prototype it to check.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I'm after is if we no longer need the cookie value, then we can remove it from the trusted_frame and with a bit of reorganizing have room for the full ecsp in place of the current ptraddr_t for csp.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In particular, it is pretty unusual to restore "part" of a register on a stack frame in an unwinder. DWARF CFI annotations don't (afaik) have a way to describe this kind of case currently for example as stack frames in general save/restore entire registers. It's true that we can't use DWARF to describe this frame today for multiple reasons (dynamically allocated trampolines; no DWARF register number for RCSP and ECSP, just CSP; etc.), but I do think we should aim to create a frame that is generally compatible with how other stack frames generally work as we are less likely to run afoul of assumptions in other unwinders.

* This field contains the code address in the trampoline that the
* callee should return to. This is only used by unwinders to detect
* compartment boundaries.
*/
ptraddr_t cookie : 62;
/*
* INVARIANT: This field contains the top of the caller's stack when the
* caller made the call.
*/
void *n_sp;
dpgao marked this conversation as resolved.
Show resolved Hide resolved
/*
* INVARIANT: This field contains the top of the caller's stack when the
* caller was last entered.
*/
void *o_sp;
ptraddr_t o_sp;
/*
* Only used by unwinders
dpgao marked this conversation as resolved.
Show resolved Hide resolved
*/
ptraddr_t csp;
/*
* c19 to c28
*/
void *regs[10];
};

_Static_assert(
sizeof(struct trusted_frame) == sizeof(uintptr_t) * C18N_TRUSTED_FRAME_SIZE,
"Unexpected struct trusted_frame size");
#endif
#endif
8 changes: 4 additions & 4 deletions libexec/rtld-elf/rtld_c18n.c
Original file line number Diff line number Diff line change
Expand Up @@ -567,9 +567,9 @@ _rtld_longjmp_impl(uintptr_t ret, void **buf, struct trusted_frame *csp,
* Unwind each frame before the target frame.
*/
do {
stk = cheri_setoffset(cur->o_sp, cheri_getlen(cur->o_sp));
stk = cheri_setoffset(cur->n_sp, cheri_getlen(cur->n_sp));
--stk;
stk->top = cur->o_sp;
stk->top = cheri_setaddress(cur->n_sp, cur->o_sp);
cur = cheri_setaddress(cur, cur->next);
} while (cur < target);

Expand All @@ -587,8 +587,8 @@ _rtld_longjmp_impl(uintptr_t ret, void **buf, struct trusted_frame *csp,
*/
stk = cheri_setoffset(rcsp, cheri_getlen(rcsp));
--stk;
csp->o_sp = stk->top;
stk->top = rcsp;
csp->n_sp = rcsp;
csp->o_sp = (ptraddr_t)stk->top;

return ((struct jmp_args) { .ret = ret });
}
Expand Down
Loading