Skip to content

Commit

Permalink
CONT stack overflow postmortem (esp8266#9083)
Browse files Browse the repository at this point in the history
- check in cont_run() and cont_suspend() whether a1 is out of bounds
- in case a1 is broken, postmortem will still report proper context in
  proper stack boundaries

additionally
- as suggested in esp8266#9069, change stack smashing to a single line that
  does not mention any Exceptions
- reduce overall stack dump length when there are know garbage values i.e. cont stackguard
- decoder.py addr search regexp would no longer skip stack lines with '<'
- fix decoder.py parsing so it notices both stack smashing and alloc errors
  • Loading branch information
mcspr authored and hasenradball committed Nov 18, 2024
1 parent 5d8c994 commit d851344
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 48 deletions.
20 changes: 17 additions & 3 deletions cores/esp8266/cont.S
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,14 @@
cont_suspend:
/* a1: sp */
/* a2: void* cont_ctx */
/* adjust stack and save registers */
/* adjust stack */
addi a1, a1, -24

/* make sure that a1 points after cont_ctx.stack[] */
addi a4, a2, 32
bltu a1, a4, cont_overflow

/* save registers */
s32i a12, a1, 0
s32i a13, a1, 4
s32i a14, a1, 8
Expand All @@ -47,6 +53,11 @@ cont_suspend:
l32i a1, a2, 4
jx a0

cont_overflow:
mov.n a3, a1
movi a4, __stack_overflow
jx a4

cont_continue:
l32i a12, a1, 0
l32i a13, a1, 4
Expand Down Expand Up @@ -113,20 +124,23 @@ cont_run:
bnez a4, cont_resume
/* else */
/* set new stack*/
l32i a1, a2, 16;
l32i a1, a2, 16
/* goto pfn */
movi a2, cont_wrapper
jx a2

cont_resume:
/* a1 <- cont_ctx.sp_suspend */
l32i a1, a2, 12
/* make sure that a1 points after cont_ctx.stack[] */
addi a5, a2, 32
bltu a1, a5, cont_overflow
/* reset yield flag, 0 -> cont_ctx.pc_suspend */
movi a3, 0
s32i a3, a2, 8
/* jump to saved cont_ctx.pc_suspend */
movi a0, cont_ret
jx a4
jx a4

cont_norm:
/* calculate pointer to cont_ctx.struct_start from sp */
Expand Down
11 changes: 9 additions & 2 deletions cores/esp8266/cont.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,16 @@
#define CONT_H_

#include <stdbool.h>
#include <stdint.h>

#ifndef CONT_STACKSIZE
#define CONT_STACKSIZE 4096
#endif

#ifndef CONT_STACKGUARD
#define CONT_STACKGUARD 0xfeefeffe
#endif

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -62,8 +67,11 @@ void cont_run(cont_t*, void (*pfn)(void));
// execution state (registers and stack)
void cont_suspend(cont_t*);

// Check that cont resume state is valid. Immediately panics on failure.
void cont_check_overflow(cont_t*);

// Check guard bytes around the stack. Immediately panics on failure.
void cont_check(cont_t*);
void cont_check_guard(cont_t*);

// Go through stack and check how many bytes are most probably still unchanged
// and thus weren't used by the user code. i.e. that stack space is free. (high water mark)
Expand All @@ -78,7 +86,6 @@ bool cont_can_suspend(cont_t* cont);
// free, running the routine, then checking the max free
void cont_repaint_stack(cont_t *cont);


#ifdef __cplusplus
}
#endif
Expand Down
27 changes: 17 additions & 10 deletions cores/esp8266/cont_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,38 +23,45 @@
#include <stddef.h>
#include <string.h>

#include "cont.h"
#include "core_esp8266_features.h"
#include "debug.h"

#include "cont.h"

extern "C"
{

static constexpr unsigned int CONT_STACKGUARD { 0xfeefeffe };
static constexpr uint32_t CONT_STACKSIZE_U32 { sizeof(cont_t::stack) / sizeof(*cont_t::stack) };

void cont_init(cont_t* cont) {
memset(cont, 0, sizeof(cont_t));

cont->stack_guard1 = CONT_STACKGUARD;
cont->stack_guard2 = CONT_STACKGUARD;
cont->stack_end = cont->stack + (sizeof(cont->stack) / 4);
cont->stack_end = &cont->stack[0] + CONT_STACKSIZE_U32;
cont->struct_start = (unsigned*) cont;

// fill stack with magic values to check high water mark
for(int pos = 0; pos < (int)(sizeof(cont->stack) / 4); pos++)
for(int pos = 0; pos < (int)(CONT_STACKSIZE_U32); pos++)
{
cont->stack[pos] = CONT_STACKGUARD;
}
}

void IRAM_ATTR cont_check(cont_t* cont) {
if ((cont->stack_guard1 == CONT_STACKGUARD)
&& (cont->stack_guard2 == CONT_STACKGUARD))
void IRAM_ATTR cont_check_guard(cont_t* cont) {
if ((cont->stack_guard1 != CONT_STACKGUARD)
|| (cont->stack_guard2 != CONT_STACKGUARD))
{
return;
__stack_chk_fail();
__builtin_unreachable();
}
}

__stack_chk_fail();
__builtin_unreachable();
void IRAM_ATTR cont_check_overflow(cont_t* cont) {
if (cont->sp_suspend && (cont->sp_suspend < &cont->stack[0])) {
__stack_overflow(cont, cont->sp_suspend);
__builtin_unreachable();
}
}

// No need for this to be in IRAM, not expected to be IRQ called
Expand Down
2 changes: 1 addition & 1 deletion cores/esp8266/core_esp8266_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ static void loop_wrapper() {
}
loop();
loop_end();
cont_check(g_pcont);
cont_check_guard(g_pcont);
if (serialEventRun) {
serialEventRun();
}
Expand Down
64 changes: 49 additions & 15 deletions cores/esp8266/core_esp8266_postmortem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ static const char* s_unhandled_exception = NULL;

// Common way to notify about where the stack smashing happened
// (but, **only** if caller uses our handler function)
static uint32_t s_stacksmash_addr = 0;
static uint32_t s_stack_chk_addr = 0;

void abort() __attribute__((noreturn));
static void uart_write_char_d(char c);
Expand All @@ -59,6 +59,7 @@ static void print_stack(uint32_t start, uint32_t end);
// using numbers different from "REASON_" in user_interface.h (=0..6)
enum rst_reason_sw
{
REASON_USER_STACK_OVERFLOW = 252,
REASON_USER_STACK_SMASH = 253,
REASON_USER_SWEXCEPTION_RST = 254
};
Expand Down Expand Up @@ -188,7 +189,7 @@ static void postmortem_report(uint32_t sp_dump) {
}
else if (rst_info.reason == REASON_SOFT_WDT_RST) {
ets_printf_P(PSTR("\nSoft WDT reset"));
const char infinite_loop[] = { 0x06, 0xff, 0xff }; // loop: j loop
const uint8_t infinite_loop[] = { 0x06, 0xff, 0xff }; // loop: j loop
if (is_pc_valid(rst_info.epc1) && 0 == memcmp_P(infinite_loop, (PGM_VOID_P)rst_info.epc1, 3u)) {
// The SDK is riddled with these. They are usually preceded by an ets_printf.
ets_printf_P(PSTR(" - deliberate infinite loop detected"));
Expand All @@ -198,17 +199,23 @@ static void postmortem_report(uint32_t sp_dump) {
rst_info.exccause, /* Address executing at time of Soft WDT level-1 interrupt */ rst_info.epc1, 0, 0, 0, 0);
}
else if (rst_info.reason == REASON_USER_STACK_SMASH) {
ets_printf_P(PSTR("\nStack smashing detected.\n"));
ets_printf_P(PSTR("\nException (%d):\nepc1=0x%08x epc2=0x%08x epc3=0x%08x excvaddr=0x%08x depc=0x%08x\n"),
5 /* Alloca exception, closest thing to stack fault*/, s_stacksmash_addr, 0, 0, 0, 0);
}
ets_printf_P(PSTR("\nStack smashing detected at 0x%08x\n"), s_stack_chk_addr);
}
else if (rst_info.reason == REASON_USER_STACK_OVERFLOW) {
ets_printf_P(PSTR("\nStack overflow detected\n"));
}
else {
ets_printf_P(PSTR("\nGeneric Reset\n"));
}

uint32_t cont_stack_start = (uint32_t) &(g_pcont->stack);
uint32_t cont_stack_end = (uint32_t) g_pcont->stack_end;
uint32_t stack_end;
uint32_t cont_stack_start;
if (rst_info.reason == REASON_USER_STACK_SMASH) {
cont_stack_start = s_stack_chk_addr;
} else {
cont_stack_start = (uint32_t) (&g_pcont->stack[0]);
}

uint32_t cont_stack_end = cont_stack_start + CONT_STACKSIZE;

// amount of stack taken by interrupt or exception handler
// and everything up to __wrap_system_restart_local
Expand Down Expand Up @@ -249,15 +256,21 @@ static void postmortem_report(uint32_t sp_dump) {
sp_dump = stack_thunk_get_cont_sp();
}

if (sp_dump > cont_stack_start && sp_dump < cont_stack_end) {
uint32_t stack_end;

// above and inside of cont, dump from the sp to the bottom of the stack
if ((rst_info.reason == REASON_USER_STACK_OVERFLOW)
|| ((sp_dump > cont_stack_start) && (sp_dump < cont_stack_end)))
{
ets_printf_P(PSTR("\nctx: cont\n"));
stack_end = cont_stack_end;
}
// in system, reposition to a known address
// it's actually 0x3ffffff0, but the stuff below ets_run
// is likely not really relevant to the crash
else {
ets_printf_P(PSTR("\nctx: sys\n"));
stack_end = 0x3fffffb0;
// it's actually 0x3ffffff0, but the stuff below ets_run
// is likely not really relevant to the crash
}

ets_printf_P(PSTR("sp: %08x end: %08x offset: %04x\n"), sp_dump, stack_end, offset);
Expand Down Expand Up @@ -296,11 +309,20 @@ static void print_stack(uint32_t start, uint32_t end) {
for (uint32_t pos = start; pos < end; pos += 0x10) {
uint32_t* values = (uint32_t*)(pos);

// avoid printing irrelevant data
if ((values[0] == CONT_STACKGUARD)
&& (values[0] == values[1])
&& (values[1] == values[2])
&& (values[2] == values[3]))
{
continue;
}

// rough indicator: stack frames usually have SP saved as the second word
bool looksLikeStackFrame = (values[2] == pos + 0x10);
const bool looksLikeStackFrame = (values[2] == pos + 0x10);

ets_printf_P(PSTR("%08x: %08x %08x %08x %08x %c\n"),
pos, values[0], values[1], values[2], values[3], (looksLikeStackFrame)?'<':' ');
pos, values[0], values[1], values[2], values[3], (looksLikeStackFrame) ? '<' : ' ');
}
}

Expand Down Expand Up @@ -370,7 +392,7 @@ void __panic_func(const char* file, int line, const char* func) {
uintptr_t __stack_chk_guard = 0x08675309 ^ RANDOM_REG32;
void __stack_chk_fail(void) {
s_user_reset_reason = REASON_USER_STACK_SMASH;
s_stacksmash_addr = (uint32_t)__builtin_return_address(0);
s_stack_chk_addr = (uint32_t)__builtin_return_address(0);

if (gdb_present())
__asm__ __volatile__ ("syscall"); // triggers GDB when enabled
Expand All @@ -382,4 +404,16 @@ void __stack_chk_fail(void) {
__builtin_unreachable(); // never reached, needed to satisfy "noreturn" attribute
}

void __stack_overflow(cont_t* cont, uint32_t* sp) {
s_user_reset_reason = REASON_USER_STACK_OVERFLOW;
s_stack_chk_addr = (uint32_t)&cont->stack[0];

if (gdb_present())
__asm__ __volatile__ ("syscall"); // triggers GDB when enabled

postmortem_report((uint32_t)sp);

__builtin_unreachable(); // never reached, needed to satisfy "noreturn" attribute
}

} // extern "C"
3 changes: 3 additions & 0 deletions cores/esp8266/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include <stddef.h>
#include <stdint.h>

#include "cont.h"

#define _DEBUG_LEAF_FUNCTION(...) __asm__ __volatile__("" ::: "a0", "memory")

#ifdef DEBUG_ESP_CORE
Expand Down Expand Up @@ -32,6 +34,7 @@ extern "C"
{
#endif
void __stack_chk_fail(void) __attribute__((noreturn));
void __stack_overflow(cont_t*, uint32_t*) __attribute__((noreturn));
void __unhandled_exception(const char* str) __attribute__((noreturn));
void __panic_func(const char* file, int line, const char* func) __attribute__((noreturn));
#define panic() __panic_func(PSTR(__FILE__), __LINE__, __func__)
Expand Down
33 changes: 16 additions & 17 deletions tools/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
"permit stores",
)


# similar to java version, which used `list` and re-formatted it
# instead, simply use an already short-format `info line`
# TODO `info symbol`? revert to `list`?
Expand Down Expand Up @@ -96,12 +97,12 @@ def addresses_addr2line(addr2line, elf, addresses):


def decode_lines(format_addresses, elf, lines):
STACK_RE = re.compile(r"^[0-9a-f]{8}:\s+([0-9a-f]{8} ?)+ *$")
ANY_ADDR_RE = re.compile(r"0x[0-9a-fA-F]{8}|[0-9a-fA-F]{8}")
HEX_ADDR_RE = re.compile(r"0x[0-9a-f]{8}")

LAST_ALLOC_RE = re.compile(
r"last failed alloc call: ([0-9a-fA-F]{8})\(([0-9]+)\).*"
)
LAST_ALLOC = "last failed alloc"
MEM_ERR_LINE_RE = re.compile(r"^(Stack|last failed alloc call)")

STACK_LINE_RE = re.compile(r"^[0-9a-f]{8}:\s\s+")

CUT_HERE_STRING = "CUT HERE FOR EXCEPTION DECODER"
EXCEPTION_STRING = "Exception ("
Expand Down Expand Up @@ -131,13 +132,11 @@ def format_address(address):
stack_addresses = print_all_addresses(stack_addresses)
last_stack = line.strip()
# 3fffffb0: feefeffe feefeffe 3ffe85d8 401004ed
elif in_stack and STACK_RE.match(line):
stack, addrs = line.split(":")
addrs = addrs.strip()
addrs = addrs.split(" ")
elif in_stack and STACK_LINE_RE.match(line):
_, addrs = line.split(":")
addrs = ANY_ADDR_RE.findall(addrs)
stack_addresses.setdefault(last_stack, [])
for addr in addrs:
stack_addresses[last_stack].append(addr)
stack_addresses[last_stack].extend(addrs)
# epc1=0xfffefefe epc2=0xfefefefe epc3=0xefefefef excvaddr=0xfefefefe depc=0xfefefefe
elif EPC_STRING in line:
pairs = line.split()
Expand All @@ -152,13 +151,13 @@ def format_address(address):
elif EXCEPTION_STRING in line:
number = line.strip()[len(EXCEPTION_STRING) : -2]
print(f"Exception ({number}) - {EXCEPTION_CODES[int(number)]}")
# stack smashing detected at <ADDR>
# last failed alloc call: <ADDR>(<NUMBER>)[@<maybe file loc>]
elif LAST_ALLOC in line:
values = LAST_ALLOC_RE.match(line)
if values:
addr, size = values.groups()
print()
print(f"Allocation of {size} bytes failed: {format_address(addr)}")
elif MEM_ERR_LINE_RE.match(line):
for addr in ANY_ADDR_RE.findall(line):
line = line.replace(addr, format_address(addr))
print()
print(line.strip())
# postmortem guards our actual stack dump values with these
elif ">>>stack>>>" in line:
in_stack = True
Expand Down

0 comments on commit d851344

Please sign in to comment.