Skip to content

Commit

Permalink
1. I added explicit supoprt for ASAN poisoning, but it was not partic…
Browse files Browse the repository at this point in the history
…ularly valuable in any way, so I:

2. Added a tremendous amount of heap checking.
3. I used this to find and fix:

Hash value caching moved from the con4m object header into the alloc header. This way, any GC'd pointer will have its hash value cached. Not doing it that way was an oversight. And, as a result, occasionally something would be in a dict or set, but the hash value was based on its OLD pointer value, so a collection would give the same value a new hash. Not many things like this are used as keys right now, but one was the module worklist at the top level, so if a collect happened at the wrong time, you could end up in an infinite loop, because some module was never going to get removed from the set (but could always be retrieved from it). To test this out, I lowered the starting heap size all the way down to 1K to try to trigger the problem as much as possible (tho the heap doubles in size if, after the previous collect, there's deemed not sufficient space). Amazingly it didn't slow things down.

Also along the way:
- Don't exit the longjmp context before throwing an error....
- Get parsing working on switch statements again; next get need to fix the code gen (codegen still not done; coming next).
- Use __builtin_frame_address(0) instead of a local variable.
- Setup initial con4m-specific meson options
  • Loading branch information
viega committed Jul 5, 2024
1 parent b4fdc8e commit 4cac305
Show file tree
Hide file tree
Showing 34 changed files with 791 additions and 220 deletions.
1 change: 1 addition & 0 deletions dev
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ case $1 in
build) meson_build build --buildtype=plain
;;
debug) meson_build debug --buildtype=debug
meson configure debug -Duse_ubsan=true -Duse_asan=true -Duse_memcheck=true
debug_it
;;
release) meson_build release --buildtype=release
Expand Down
2 changes: 1 addition & 1 deletion doc/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ Additionally, type names are all contextual keywords.
The following are punctuation tokens that will eat one trailing newline:

```
+ += - -= -> * *= / /= % %= < <= << <<= > >= >> >>= ! != , . { [ ( & &= | |= ^ ^= = :
+ += - -= -> * *= / /= % %= < <= << <<= > >= >> >>= ! != , . { [ ( & &= | |= ^ ^= =
```

The following punctuation tokens do NOT eat a following newline:
Expand Down
4 changes: 1 addition & 3 deletions include/compiler/datatypes/parse.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,5 @@ typedef struct {
// call_resolution_t; this one is NOT pre-alloc'd for us.
// - For breaks, continues, returns, it will hold the c4m_loop_info_t
// (the pnode_t not the tree node) that constitutes the jump target.
// -

bool have_value;
bool have_value;
} c4m_pnode_t;
8 changes: 5 additions & 3 deletions include/compiler/parse.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ c4m_node_get_loc_str(c4m_tree_node_t *n)
static inline c4m_utf8_t *
c4m_node_list_join(c4m_list_t *nodes, c4m_str_t *joiner, bool trailing)
{
int64_t n = c4m_list_len(nodes);
int64_t n = c4m_list_len(nodes);
c4m_list_t *strarr = c4m_new(c4m_type_list(c4m_type_utf8()));

for (int64_t i = 0; i < n; i++) {
Expand Down Expand Up @@ -80,15 +80,14 @@ c4m_node_simp_literal(c4m_tree_node_t *n)
return tok->literal_value;
}


typedef struct c4m_pass1_ctx {
c4m_tree_node_t *cur_tnode;
c4m_pnode_t *cur;
c4m_spec_t *spec;
c4m_file_compile_ctx *file_ctx;
c4m_scope_t *static_scope;
c4m_list_t *extern_decls;
bool in_func;
c4m_list_t *extern_decls;
} c4m_pass1_ctx;

static inline c4m_tree_node_t *
Expand Down Expand Up @@ -119,6 +118,9 @@ c4m_node_down(c4m_pass1_ctx *ctx, int i)
return false;
}

if (n->children[i]->parent != n) {
c4m_print_parse_node(n->children[i]);
}
assert(n->children[i]->parent == n);
c4m_set_current_node(ctx, n->children[i]);

Expand Down
11 changes: 8 additions & 3 deletions include/con4m.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#pragma once

// #define C4M_FULL_MEMCHECK
// #define C4M_DEBUG
#define C4M_GC_STATS
// #define C4M_GC_STATS
// #define C4M_TRACE_GC

// #define C4M_GCT_MOVE 1
Expand All @@ -20,13 +21,17 @@

// This won't work on systems that require aligned pointers.
// #define C4M_PARANOID_STACK_SCAN
// #define C4M_PARSE_DEBUG

// UBSan hates our underflow check.
#define C4M_OMIT_UNDERFLOW_CHECKS

// #define C4M_OMIT_UNDERFLOW_CHECKS
#ifdef C4M_NO_DEV_MODE
#undef C4M_DEV
#undef C4M_PARSE_DEBUG
#else
#ifdef C4M_PARSE_DEBUG
#define C4M_DEV
#endif
#define C4M_DEV
#endif

Expand Down
17 changes: 15 additions & 2 deletions include/con4m/box.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,29 @@ c4m_box_obj(c4m_box_t value, c4m_type_t *type)
// However, the allocated item allocated the actual item's size, so we
// have to make sure to get it right on both ends; we can't just
// dereference a uint64_t, for instance.

static inline c4m_box_t
c4m_unbox_obj(c4m_box_t *box)
{
c4m_box_t result = {
.u64 = 0,
};

switch (c4m_get_alloc_len(c4m_get_my_type(box))) {
c4m_type_t *t = c4m_type_unbox(c4m_get_my_type(box));

switch (c4m_get_alloc_len(t)) {
case 1:
result.u8 = box->u8;
// On my mac, when this gets compiled w/ ASAN, ASAN somehow
// mangles the bool even when properly going through the union
// here.
//
// So this shouldn't be necessary, yet here it is.
if (t->details->base_type->typeid == C4M_T_BOOL) {
result.u64 = !!box->u64;
}
else {
result.u8 = box->u8;
}
break;
case 2:
result.u16 = box->u16;
Expand Down
31 changes: 26 additions & 5 deletions include/con4m/datatypes/memory.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#pragma once
#include "con4m.h"

#if defined(C4M_FULL_MEMCHECK) && !defined(C4M_GC_STATS)
#define C4M_GC_STATS
#endif

#define C4M_FORCED_ALIGNMENT 16

typedef void (*c4m_mem_scan_fn)(uint64_t *, int);
Expand Down Expand Up @@ -61,12 +65,13 @@ typedef struct c4m_alloc_hdr {
// and a pointer to a bitfield that contains that many bits. The
// bits that correspond to words with pointers should be set.
c4m_mem_scan_fn scan_fn;

#if defined(C4M_GC_STATS) || defined(C4M_DEBUG)
char *alloc_file;
int alloc_line;
#ifdef C4M_FULL_MEMCHECK
uint64_t *end_guard_loc;
int request_len;
#endif
//
char *alloc_file;
int alloc_line;
__uint128_t cached_hash;
// The actual exposed data. This must be 16-byte aligned!
alignas(C4M_FORCED_ALIGNMENT) uint64_t data[];
} c4m_alloc_hdr;
Expand All @@ -76,6 +81,18 @@ typedef struct c4m_finalizer_info_t {
struct c4m_finalizer_info_t *next;
} c4m_finalizer_info_t;

#ifdef C4M_FULL_MEMCHECK
typedef struct c4m_shadow_alloc_t {
struct c4m_shadow_alloc_t *next;
struct c4m_shadow_alloc_t *prev;
char *file;
int line;
int len;
c4m_alloc_hdr *start;
uint64_t *end;
} c4m_shadow_alloc_t;
#endif

typedef struct {
void *ptr;
uint64_t num_items;
Expand All @@ -90,6 +107,10 @@ typedef struct {
#endif

typedef struct c4m_arena_t {
#ifdef C4M_FULL_MEMCHECK
c4m_shadow_alloc_t *shadow_start;
c4m_shadow_alloc_t *shadow_end;
#endif
c4m_alloc_hdr *next_alloc;
hatrack_zarray_t *roots;
c4m_set_t *external_holds;
Expand Down
1 change: 0 additions & 1 deletion include/con4m/datatypes/objects.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ typedef struct {
struct c4m_base_obj_t {
c4m_dt_info_t *base_data_type;
struct c4m_type_t *concrete_type;
__uint128_t cached_hash;
// The exposed object data.
uint64_t data[];
};
Expand Down
4 changes: 3 additions & 1 deletion include/con4m/datatypes/strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
**/

#define C4M_STR_HASH_KEY_POINTER_OFFSET 0
#define C4M_HASH_CACHE_OFFSET (-2 * (int32_t)sizeof(uint64_t))
#define C4M_HASH_CACHE_OBJ_OFFSET (-4 * (int32_t)sizeof(uint64_t))
#define C4M_HASH_CACHE_RAW_OFFSET (-2 * (int32_t)sizeof(uint64_t))

typedef struct c4m_str_t {
char *data;
c4m_style_info_t *styling;
Expand Down
39 changes: 33 additions & 6 deletions include/con4m/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,8 @@

#ifndef C4M_DEFAULT_ARENA_SIZE

#define C4M_DEFAULT_ARENA_SIZE (1 << 26)
// Was previously using 1 << 19
// But this needs to be much bigger than the stack size; 21 is probably
// the minimum value without adjusting the stack.
// This is the size any test case that prints a thing grows to awfully fast.
#define C4M_DEFAULT_ARENA_SIZE (1 << 24)
#endif

// In the future, we would expect that a writer seeing the
Expand Down Expand Up @@ -302,8 +300,6 @@ c4m_round_up_to_given_power_of_2(uint64_t power, uint64_t n)

typedef void (*c4m_gc_hook)();

extern void c4m_get_stack_scan_region(uint64_t *top,
uint64_t *bottom);
extern void c4m_initialize_gc();
extern void c4m_gc_heap_stats(uint64_t *, uint64_t *, uint64_t *);
extern void c4m_gc_add_hold(c4m_obj_t);
Expand All @@ -315,9 +311,40 @@ extern void c4m_internal_lock_then_unstash_heap();
extern void c4m_get_heap_bounds(uint64_t *, uint64_t *, uint64_t *);
extern void c4m_gc_register_collect_fns(c4m_gc_hook, c4m_gc_hook);
extern c4m_alloc_hdr *c4m_find_alloc(void *);
extern bool c4m_in_heap(void *);

#ifdef C4M_GC_STATS
uint64_t c4m_get_alloc_counter();
#else
#define c4m_get_alloc_counter() (0)
#endif

#ifdef C4M_FULL_MEMCHECK
void c4m_alloc_display_front_guard_error(c4m_alloc_hdr *, void *, char *, int, bool);
void c4m_alloc_display_rear_guard_error(c4m_alloc_hdr *, void *, int, void *, char *, int, bool);

void _c4m_memcheck_raw_alloc(void *, char *, int);
void _c4m_memcheck_object(c4m_obj_t, char *, int);
#define c4m_memcheck_raw_alloc(x) \
_c4m_memcheck_raw_alloc(((void *)x), __FILE__, __LINE__);
#define c4m_memcheck_object(x) \
_c4m_memcheck_object(((void *)x), __FILE__, __LINE__);
#else
#define c4m_memcheck_raw_alloc(x)
#define c4m_memcheck_object(x)
#endif

#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
void __asan_poison_memory_region(void const volatile *addr, size_t size);
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);

#define ASAN_POISON_MEMORY_REGION(addr, size) \
__asan_poison_memory_region((addr), (size))
#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \
__asan_unpoison_memory_region((addr), (size))
#else
#define ASAN_POISON_MEMORY_REGION(addr, size) \
((void)(addr), (void)(size))
#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \
((void)(addr), (void)(size))
#endif
1 change: 0 additions & 1 deletion include/con4m/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,6 @@ c4m_type_is_box(c4m_type_t *t)
static inline c4m_type_t *
c4m_type_unbox(c4m_type_t *t)
{
assert(c4m_type_is_box(t));
return (c4m_type_t *)t->details->tsi;
}

Expand Down
50 changes: 32 additions & 18 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,29 @@ if (get_option('buildtype') == 'release')
link_args = link_args + ['-flto']
endif



if (get_option('buildtype') == 'debug')
if (get_option('use_asan') == true)
c_args = c_args + ['-fsanitize=address',
'-fsanitize=undefined',
'-fsanitize-recover=all',
'-fsanitize-recover=all'
]
link_args = link_args + ['-fsanitize=address',
'-fsanitize=undefined',
'-fsanitize-recover=all'
]
endif

if (get_option('use_ubsan') == true)
c_args = c_args + ['-fsanitize=undefined',
'-fsanitize-recover=all'
]
endif

if (get_option('use_memcheck') == true)
c_args = c_args + ['-DC4M_FULL_MEMCHECK']
endif

if (get_option('show_gc_stats') == true)
c_args = c_args + ['-DC4M_GC_STATS']
endif


exe_link_args = link_args + ['-flto', '-w']
exe_c_args = c_args + ['-flto', '-DHATRACK_REFERENCE_ALGORITHMS']
Expand Down Expand Up @@ -222,12 +232,14 @@ libhat = static_library('hatrack',
c_args : c_args,
link_args : link_args)

# library('con4m-dll',
# lib_src,
# include_directories : incdir,
# dependencies : all_deps,
# c_args : c4m_c_args,
# link_args: link_args)
if get_option('build_con4m_dll') == true
library('con4m-dll',
lib_src,
include_directories : incdir,
dependencies : all_deps,
c_args : c4m_c_args,
link_args: link_args)
endif

executable('c4test', test_src,
include_directories : incdir,
Expand All @@ -236,9 +248,11 @@ executable('c4test', test_src,
link_args : exe_link_args,
link_with : libc4m)

# executable('hash', hash_test_src,
# include_directories : incdir,
# dependencies : all_deps,
# link_args : exe_link_args,
# link_with : libhat,
# c_args : exe_c_args)
if get_option('build_hatrack') == true
executable('hash', hash_test_src,
include_directories : incdir,
dependencies : all_deps,
link_args : exe_link_args,
link_with : libhat,
c_args : exe_c_args)
endif
7 changes: 7 additions & 0 deletions meson.options
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
option('use_asan', type: 'boolean', value: false)
option('use_ubsan', type: 'boolean', value: false)
option('build_hatrack', type: 'boolean', value: false)
option('use_memcheck', type: 'boolean', value: false)
option('build_con4m_dll', type: 'boolean', value: false)
# Currently, if this isn't on, there's an issue.
option('show_gc_stats', type: 'boolean', value: true)
Loading

0 comments on commit 4cac305

Please sign in to comment.