-
Notifications
You must be signed in to change notification settings - Fork 1
/
object.c
513 lines (473 loc) · 15.3 KB
/
object.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include "tinylisp.h"
static int _tl_refill_freelist(tl_interp *in, size_t objects) {
/* XXX overflow hazard but we don't want memset here */
tl_object *list = tl_alloc_malloc(in, objects * sizeof(tl_object));
size_t index;
#ifdef GC_DEBUG
tl_printf(in, "refill: 0x%zx objects at %p\n", objects, list);
#endif
if(!list) return 0;
for(index = 0; index < objects; index++) {
list[index].next_alloc = in->free_alloc;
list[index].prev_alloc = NULL;
if(in->free_alloc) in->free_alloc->prev_alloc = list + index;
in->free_alloc = list + index;
}
return 1;
}
/** Create a new object.
*
* The object has undefined type, which must be initialized. The more specific
* constructors already do this.
*
* The returned object is threaded into the garbage collector, and will be
* collected if it is not reachable from any root on the next call to `tl_gc`.
*/
tl_object *tl_new(tl_interp *in) {
tl_object *obj = NULL;
tl_trace(new_enter, in);
/* Is the freelist empty? */
if(!in->free_alloc) {
if(!_tl_refill_freelist(in, in->oballoc_batch)) {
/* If we're here, we can't allocate a full batch. Try just one
* object instead; we'll take it from the freelist momentarily. */
if(!_tl_refill_freelist(in, 1)) {
/* Still no luck? Try compacting our free memory; this is very expensive. */
tl_gc(in);
tl_reclaim(in);
if(!_tl_refill_freelist(in, 1)) {
/* We are well and truly out of memory; no sense in proceeding. */
abort();
}
}
}
}
/* Unthread from freelist */
obj = in->free_alloc;
in->free_alloc = obj->next_alloc;
if(in->free_alloc) in->free_alloc->prev_alloc = NULL;
/* Thread into alloc list */
obj->next_alloc = in->top_alloc;
obj->prev_alloc = NULL;
if(in->top_alloc) in->top_alloc->prev_alloc = obj;
in->top_alloc = obj;
tl_trace(new_exit, in, obj);
return obj;
}
/** Create a new integer object.
*
* Note that TL does not cache integers; every integer object is a uniqe
* pointer. If you find yourself frequently using the same value, you may wish
* to implement your own cache.
*/
tl_object *tl_new_int(tl_interp *in, long ival) {
tl_object *obj = tl_new(in);
obj->kind = TL_INT;
obj->ival = ival;
return obj;
}
/** Create a new symbol object.
*
* This assumes that `str` is a C string (terminated by a NULL byte). Use
* `tl_new_sym_data` for more general-purpose data.
*/
tl_object *tl_new_sym(tl_interp *in, const char *str) {
if(str) {
return tl_new_sym_data(in, str, strlen(str));
} else {
return tl_new_sym_data(in, NULL, 0);
}
}
/** Creates a new symbol object with length.
*
* TL assumes that the data in this symbol is immutable, especially for
* comparisons. Applications shouldn't mutate it, but are free to make as many
* symbols as they would like.
*/
tl_object *tl_new_sym_data(tl_interp *in, const char *data, size_t len) {
tl_buffer buf = {(char *) data, len}; /* Discard constness--it won't mutate anyway */
return tl_new_sym_name(in, tl_ns_resolve(in, &in->ns, buf));
}
/** Creates a new symbol object by name.
*
* It's assumed that the name is unique to this symbol and was correctly
* initialized, generally by resolving against the interpreter's own namespace.
* This isn't enforced, however, and this mechanism could theoretically be used
* to introduce entirely hygienic symbols.
*/
tl_object *tl_new_sym_name(tl_interp *in, tl_name *name) {
tl_object *obj = tl_new(in);
obj->kind = TL_SYM;
obj->nm = name;
return obj;
}
/** Creates a new pair.
*
* This is the underlying constructor for a "cons" cell.
*/
tl_object *tl_new_pair(tl_interp *in, tl_object *first, tl_object *next) {
tl_object *obj = tl_new(in);
obj->kind = TL_PAIR;
obj->first = first;
obj->next = next;
return obj;
}
/** Creates a new continuation ("then").
*
* These are the constituents of the continuation stack (`interp->conts`). See
* `eval.c` for details.
*/
tl_object *tl_new_then(tl_interp *in, void (*cfunc)(tl_interp *, tl_object *, tl_object *), tl_object *state, const char *name) {
tl_object *obj = tl_new(in);
obj->kind = TL_THEN;
obj->cfunc = cfunc;
obj->state = state;
obj->name = name ? tl_strdup(in, name) : NULL;
return obj;
}
/** Creates a new cfunction.
*
* These are typically used to expose C code to TinyLISP code. A base C
* function receives its arguments syntactically (unevaluated, as does a
* macro). For C functions which expect all arguments by value, it's usually
* better to use `tl_new_cfunc_byval` for many reasons.
*/
tl_object *_tl_new_cfunc(tl_interp *in, void (*cfunc)(tl_interp *, tl_object *, tl_object *), const char *name) {
tl_object *obj = tl_new_then(in, cfunc, TL_EMPTY_LIST, name);
obj->kind = TL_CFUNC;
return obj;
}
/** Creates a new "by-value" C function.
*
* Unlike a base C function, this version receives all of its arguments by
* value, which is what most functions expect (and the most flexible), as does
* a TinyLISP lambda.
*/
tl_object *_tl_new_cfunc_byval(tl_interp *in, void (*cfunc)(tl_interp *, tl_object *, tl_object *), const char *name) {
tl_object *obj = tl_new_then(in, cfunc, TL_EMPTY_LIST, name);
obj->kind = TL_CFUNC_BYVAL;
return obj;
}
/** Creates a new macro.
*
* This is rarely needed from C (except for tl-macro).
*/
tl_object *tl_new_macro(tl_interp *in, tl_object *args, tl_object *envn, tl_object *body, tl_object *env) {
tl_object *obj = tl_new(in);
obj->kind = envn ? TL_MACRO : TL_FUNC;
obj->args = args;
obj->body = body;
obj->env = env;
obj->envn = envn;
return obj;
}
/** Creates a new continuation.
*
* These are the objects created by call-with-current-continuation (call/cc).
*/
tl_object *tl_new_cont(tl_interp *in, tl_object *env, tl_object *conts, tl_object *values) {
tl_object *obj = tl_new(in);
obj->kind = TL_CONT;
obj->ret_env = env;
obj->ret_conts = conts;
obj->ret_values = values;
return obj;
}
/** Creates a new pointer object.
*
* These objects are thin wrappers around a C pointer, generally opaque to the
* interpreter and its language. They're primarily for extension developers to
* embed application-specific data as values that can be manipulated as a TL
* value: passed to functions, stored in lists, etc.
*
* The tag is stored in the tag field. You can allocate a tag for your own
* usage (with ::tl_is_tag) using ::tl_new_tag, or you can use the reserved
* ::TL_NO_TAG if you don't care about it. It has no semantic meaning beyond
* this use.
*
* Note that TinyLISP can easily retain values indefinitely; garbage-collection
* is only done when ::tl_gc is called, normally via tl_interp::gc_events .
* Beside that, objects can be retained in surprising ways, such as the frames
* captured by a continuation which is itself still reachable. Surrendering
* this pointer to the runtime may well require that you ensure it stays alive
* for potentially the lifetime of the entire interpreter. The only time it is
* safe to completely free its referent is in the callback given, which is
* invoked from ::tl_free . (Don't free the object itself in this callback,
* only the pointer it contains.)
*
* Storing TinyLISP values inside of a pointer, even indirectly, is a bad idea;
* there is no public-facing routine for allowing the GC mark pass to view
* inside of this pointer, and so it may eventually contain dangling
* references. If you want to expose multiple TL values, construct a list (see
* ::tl_new_pair ). Lists can safely contain pointer objects, and are
* compatible with GC.
*/
tl_object *tl_new_ptr(tl_interp *in, void *ptr, void (*gcfunc)(tl_interp *, tl_object *), tl_tag tag) {
tl_object *obj = tl_new(in);
obj->kind = TL_PTR;
obj->ptr = ptr;
obj->gcfunc = gcfunc;
obj->tag = tag;
return obj;
}
/** "Free" an object, adding it to a reclaimable freelist.
*
* TinyLISP has a tracing GC, so, as a rule, you should never need to do this.
* However, you may do this if you can prove that you are removing the last
* reference to this object, as a slight optimization. The amount of
* optimization is rarely worth the proof (since TL code can easily capture
* references you don't know about in many cases), so it's best to simply not
* use this function (except from within `tl_gc`).
*
* Bizarre behavior can happen with references to objects in the freelist, so
* we make an effort to poison them so their use causes issues early. In
* particular, this list can be destroyed with ::tl_reclaim, so any such bug is
* tantamount to a use-after-free.
*
* The underyling allocator's "free" is called from ::tl_reclaim. Under memory
* pressure (as in ::tl_new), it is best to call ::tl_reclaim immediately after
* ::tl_gc.
*/
void tl_free(tl_interp *in, tl_object *obj) {
tl_trace(free_enter, in, obj);
if(!obj) return;
if(obj->prev_alloc) {
obj->prev_alloc->next_alloc = tl_make_next_alloc(
obj->prev_alloc->next_alloc,
tl_next_alloc(obj)
);
} else {
in->top_alloc = tl_make_next_alloc(
in->top_alloc,
tl_next_alloc(obj)
);
}
if(tl_next_alloc(obj)) {
tl_next_alloc(obj)->prev_alloc = obj->prev_alloc;
}
if(in->free_alloc) in->free_alloc->prev_alloc = obj;
obj->next_alloc = in->free_alloc;
obj->prev_alloc = NULL;
in->free_alloc = obj;
tl_trace(free_exit, in, obj);
}
/** Well and truly destroys an object, free()ing its memory.
*
* This is unsafe to do to any live object, and will manifest memory unsafety
* and other undefined behavior if you do. It's best to only let ::tl_reclaim
* call this function unless you know what you're doing.
*/
void tl_destroy(tl_interp *in, tl_object *obj) {
tl_trace(destroy_enter, in, obj);
switch(obj->kind) {
case TL_CFUNC:
case TL_CFUNC_BYVAL:
case TL_THEN:
tl_alloc_free(in, obj->name);
break;
case TL_PTR:
if(obj->gcfunc) obj->gcfunc(in, obj);
break;
default:
break;
}
tl_alloc_free(in, obj);
tl_trace(destroy_exit, in, obj);
}
/** Mark an object and its descendents.
*
* When adding a new type of function, be sure that this recursively visits the
* new object type's descendent `tl_object` pointers.
*/
static void _tl_mark_pass(tl_object *obj) {
if(!obj) return;
if(tl_is_marked(obj)) return;
tl_mark(obj);
switch(obj->kind) {
case TL_INT:
case TL_SYM:
case TL_PTR:
break;
case TL_CFUNC:
case TL_CFUNC_BYVAL:
case TL_THEN:
_tl_mark_pass(obj->state);
break;
case TL_FUNC:
case TL_MACRO:
_tl_mark_pass(obj->args);
_tl_mark_pass(obj->body);
_tl_mark_pass(obj->env);
_tl_mark_pass(obj->envn);
break;
case TL_PAIR:
_tl_mark_pass(obj->first);
_tl_mark_pass(obj->next);
break;
case TL_CONT:
_tl_mark_pass(obj->ret_env);
_tl_mark_pass(obj->ret_conts);
_tl_mark_pass(obj->ret_values);
break;
default:
assert(0);
}
}
/** Perform a garbage collection pass.
*
* This calls ::tl_free on objects registered to the garbage collector (via
* ::tl_new and related) but which are no longer reachable from the roots
* (generally, all `tl_object *` in the interpreter).
*/
void tl_gc(tl_interp *in) {
tl_object *obj = in->top_alloc;
tl_object *tmp;
#ifdef GC_DEBUG
size_t freed = 0;
tl_printf(in, "gc: starts\n");
#endif
tl_trace(gc_enter, in);
while(obj) {
#if defined(GC_DEBUG) && GC_DEBUG > 0
tl_printf(in, "gc: unmark: %p %O\n", obj, obj);
#endif
tl_unmark(obj);
obj = tl_next_alloc(obj);
}
tl_trace(gc_mark_enter, in);
_tl_mark_pass(in->true_);
_tl_mark_pass(in->false_);
_tl_mark_pass(in->error);
_tl_mark_pass(in->prefixes);
_tl_mark_pass(in->env);
_tl_mark_pass(in->top_env);
_tl_mark_pass(in->current);
_tl_mark_pass(in->conts);
_tl_mark_pass(in->values);
/* One could make a list of the permanent objects during the unmark scan
* above, but making said list would either (1) require allocation, which
* the GC should NOT do, or (2) require there to be a fixed-size array set
* aside somewhere for the purpose. Neither is a preferable option, so
* we'll just settle for scanning the object list one more time.
*/
obj = in->top_alloc;
while(obj) {
if(tl_is_permanent(obj)) _tl_mark_pass(obj);
obj = tl_next_alloc(obj);
}
tl_trace(gc_mark_exit, in);
obj = in->top_alloc;
while(obj) {
tmp = obj;
obj = tl_next_alloc(obj);
if(!tl_is_marked(tmp)) {
#ifdef GC_DEBUG
freed++;
#if GC_DEBUG > 0
tl_printf(in, "gc: free: %p %O\n", tmp, tmp);
#endif
#endif
tl_free(in, tmp);
}
}
#ifdef GC_DEBUG
tl_printf(in, "gc: freed 0x%zx objects\n", freed);
#endif
tl_trace(gc_exit, in);
}
/** Reclaims memory from the interpreter.
*
* This is most effective after a GC, as it merely drops all entries in the
* freelist that ::tl_gc has assembled from unreachable objects. Having these
* objects cached is good for performance, but may be undesirable if the
* program is under memory pressure; if malloc reports this, ::tl_new calls
* this automatically. Thus, regular users shouldn't do this, as it hurts
* optimization in the usual cases.
*/
void tl_reclaim(tl_interp *in) {
tl_object *obj = in->free_alloc, *next;
while(obj) {
next = tl_next_alloc(obj);
tl_destroy(in, obj);
obj = next;
}
}
/** Returns the length of a list.
*
* This is defined as the number of iterations that would be done by \ref
* tl_list_iter . This might not be what you're expecting; for example, an
* improper list has its tail counted toward its length,
*/
size_t tl_list_len(tl_object *l) {
size_t cnt = 0;
if(!l || !tl_is_pair(l)) return cnt;
for(tl_list_iter(l, item)) cnt++;
return cnt;
}
/** Reverses a list.
*
* This builds a list by pushing elements onto a running stack (another list)
* in the order they are traversed, so it requires no extra space (beyond the
* allocation of the returned list). It is, however, linear time.
*/
tl_object *tl_list_rvs(tl_interp *in, tl_object *l) {
tl_object *res = TL_EMPTY_LIST;
for(tl_list_iter(l, item)) res = tl_new_pair(in, item, res);
return res;
}
/** Reverses a list, retaining the last item as its "improper" tail. */
tl_object *tl_list_rvs_improp(tl_interp *in, tl_object *l) {
tl_object *res = tl_first(l);
l = tl_next(l);
for(tl_list_iter(l, item)) res = tl_new_pair(in, item, res);
return res;
}
/** Copies a C string into another C string.
*
* This is an analogue to `strdup` which uses the interpreter allocator
* (tl_interp::mallocf).
*/
char *tl_strdup(tl_interp *in, const char *str) {
size_t s;
char *buf;
if(!str) return NULL;
s = strlen(str) + 1;
if(!s) return NULL;
buf = tl_alloc_malloc(in, s);
if(!buf) {
tl_gc(in);
buf = tl_alloc_malloc(in, s);
assert(buf);
}
return strcpy(buf, str);
}
/** Allocates an array of size `n` with elements of size `s`, initializing all bytes to 0.
*
* This is an analogue to `calloc` which uses the interpreter allocator
* (tl_interp::mallocf).
*/
void *tl_calloc(tl_interp *in, size_t n, size_t s) {
// FIXME: Don't multiply the two size_t's unchecked--if this overflows, bad things happen.
void *region = tl_alloc_malloc(in, n * s);
if(!region) return NULL;
return memset(region, 0, n * s);
}
/** Return a C string containing the symbol's name.
*
* This is allocated with the interpreter allocator, and needs to be freed with
* ::tl_alloc_free . It's not tracked or GC'd otherwise; a fresh copy is
* returned.
*
* On a failure, this returns NULL.
*/
char *tl_sym_to_cstr(tl_interp *in, tl_object *sym) {
if(!tl_is_sym(sym)) return NULL;
void *region = tl_alloc_malloc(in, sym->nm->here.len + 1);
if(!region) return NULL;
memcpy(region, sym->nm->here.data, sym->nm->here.len);
((char *)region)[sym->nm->here.len] = 0;
return region;
}