eval.c

#include <string.h>
#include <assert.h>

#include "tinylisp.h"

/** Push an expression to be evaluated.
 *
 * The arguments (aside from the interpreter) are the expression and the
 * environment (variable bindings) in which the expression is to be evaluated.
 * Note that the expression isn't tagged for being direct or syntactic; by
 * virtue of being submitted here, its constituents will be considered
 * syntactic.
 *
 * The return value is a TL_RESULT:
 *
 * - If `TL_RESULT_DONE`, the value is available immediately on top of the
 *   value stack.
 *
 * - If `TL_RESULT_AGAIN`, an application has been pushed to the continuation
 *   stack. When this application is done, the value will be available on top
 *   of the value stack.
 *
 * Robust code should (before calling `tl_push_eval`) simply push an
 * application that receives this value using `tl_push_apply`, and it will be
 * called at the right time--either immediately afterward, or once the
 * evaluating application has succeeded. (Note that it may also never be
 * called--if an error occurs, or a called continuation abandons this
 * computation.)
 */
int tl_push_eval(tl_interp *in, tl_object *expr, tl_object *env) {
	if(tl_has_error(in)) return TL_RESULT_DONE;
	if(!expr) {
		tl_error_set(in, tl_new_sym(in, "evaluate ()"));
		return TL_RESULT_DONE;
	}
	if(tl_is_int(expr) || tl_is_callable(expr)) {  /* Self-valuating */
		tl_values_push(in, expr);
		return TL_RESULT_DONE;
	}
	if(tl_is_sym(expr)) {  /* Variable binding */
		tl_object *binding = tl_env_get_kv(in, env, expr);
		if(!binding) {
			tl_error_set(in, tl_new_pair(in, tl_new_sym(in, "unknown var"), expr));
			return TL_RESULT_DONE;
		}
		tl_values_push(in, tl_next(binding));
		return TL_RESULT_DONE;
	}
	if(tl_is_pair(expr)) {  /* Application */
		size_t len = tl_list_len(expr);
		for(tl_list_iter(expr, subex)) {
			if(subex == tl_first(expr)) {
				tl_push_apply(in, (long) len - 1, subex, env);
			} else {
				tl_values_push_syntactic(in, subex);
			}
		}
		return TL_RESULT_AGAIN;
	}
	tl_error_set(in, tl_new_pair(in, tl_new_sym(in, "unevaluable"), expr));
	return TL_RESULT_DONE;
}

/** Push an application to the continuation stack.
 *
 * In the simple case, `expr` is a callable object, which may be a builtin
 * (`TL_CFUNC`, `TL_CFUNC_BYVAL`), a C continuation (`TL_THEN`), or any kind of
 * user function (`TL_FUNC`, `TL_MACRO`). It may also be an expression (a
 * syntax) which evaluates to any one of these, in which case the evaluation
 * will almost certainly be indirected by `tl_apply_next`--see
 * `TL_APPLY_INDIRECT`. Once the indirected apply target has been evaluated, it
 * will be called as specified below.
 *
 * In the normal case, `len` specifies the number of arguments this callable
 * will receive. This is a fixed constant, because it is assumed to have been
 * generated by parsing an application (which knows the length of the list),
 * regardless of the variadicity of the underlying apply target. (Indeed,
 * builtins and continuations have no checks on variadicity other than those
 * they implement themselves.) `env` gives the evaluation environment (variable
 * bindings) of the application, which will be available during the application
 * as tl_interp::env.
 *
 * When the apply target `expr` is reduced to a simple callable object, it is
 * invoked with the next `len` values from the value stack, and with
 * tl_interp::env being set to `env`. For by-value callables (`TL_FUNC`,
 * `TL_CFUNC_BYVAL`), the arguments are all direct (evaluated); for by-name
 * callables (`TL_MACRO`, `TL_CFUNC`), the arguments are all syntactic (and an
 * error is raised if a direct value was given). C continuations (`TL_THEN`)
 * are treated as syntactic--thus, care must be taken when using the stack to
 * communicate with them, as `tl_values_push_syntactic` must be used. (Since
 * this can be undesirable if a "synthesized" syntax escapes to be evaluated to
 * a direct value, it's recommendable to restrict most
 * continuation-to-continuation communication to their embedded state.)
 *
 * There are some special "continuation flags" that can be used in places of
 * `len`; they are all negative, and change the meaning of the parameters:
 *
 * - `TL_APPLY_PUSH_EVAL`: `expr` is evaluated, but not applied. Its value is
 *   pushed on top of the stack. This is equivalent to `tl_push_eval`, but is
 *   sequenced with respect to the continuation stack.
 *
 * - `TL_APPLY_DROP_EVAL`: Same as above, but the value isn't even pushed. This
 *   is used principally for expressions in user-defined functions which aren't
 *   in tail position.
 *
 * - `TL_APPLY_INDIRECT`: The callable is to be taken from the top of the value
 *   stack; it is an error if this object is not a callable. `expr` is instead
 *   a `TL_INT` with the number of values from the stack to apply to it (id
 *   est, it serves the same function as `len` normally does).
 *
 * - `TL_APPLY_DROP`: Drops the top value of the value stack. `expr` and `env`
 *   are ignored. This is put on the stack when a `TL_APPLY_DROP_EVAL` is
 *   indirected, thus splitting into a `TL_APPLY_DROP` under a
 *   `TL_APPLY_INDIRECT`.
 *
 * - `TL_APPLY_DROP_RESCUE`: Drops the top of the rescue stack, restoring the
 *   delegation of error handling. `expr` and `env` are ignored.
 *
 * - `TL_APPLY_GETCHAR`: Requests a character from the environment. This causes
 *   `tl_apply_next` to return the result `TL_RESULT_GETCHAR`, which signals to
 *   the host environment that a character is needed; it is expected to be
 *   provided as a `TL_INT` on top of the value stack when `tl_apply_next` is
 *   subsequently called. The default implementation of `tl_run_until_done`
 *   handles this transparently by invoking the tl_interp::readf function.
 */
void tl_push_apply(tl_interp *in, long len, tl_object *expr, tl_object *env) {
	in->conts = tl_new_pair(in, tl_new_pair(in, tl_new_int(in, len), tl_new_pair(in, expr, env)), in->conts);
	in->ctr_events++;
	if(in->gc_events > 0 && in->ctr_events >= in->gc_events) {
		tl_gc(in);
		in->ctr_events = 0;
	}
}

/** C continuation for calling a function.
 *
 * This is invoked after the value stack has been verified to be all syntactic,
 * or converted to all direct values (via `_tl_eval_all_args`). In the case of
 * built-ins or continuations, this simply calls the attached function pointer.
 * In the case of user functions, this:
 *
 * - Creates a new environment frame;
 *
 * - Binds named arguments into them, using the generalized argument rules;
 *
 * - Pushes the frame onto the environment; and
 *
 * - Pushes the user code as `PUSH_EVAL`/`DROP_EVAL` entries on the cont stack.
 */
void _tl_apply_next_body_callable_k(tl_interp *in, tl_object *args, tl_object *cont) {
	tl_object *callex = tl_first(tl_next(cont));
	tl_object *env = tl_next(tl_next(cont));
	tl_object *frm = TL_EMPTY_LIST;

	/* Handle builtins */
	if(tl_is_cfunc(callex) || tl_is_cfunc_byval(callex) || tl_is_then(callex)) {
		callex->cfunc(in, args, callex->state);
		return;
	}

	/* Determine if arguments are legal and bind them */
	if(tl_is_pair(callex->args)) {
		char is_improp = 0;
		long paramlen = 0;

		for(tl_list_iter(callex->args, item)) {
			paramlen++;
			if(tl_is_sym(tl_next(l_item))) {
				is_improp = 1;
				break;
			}
		}

		if(is_improp ? (tl_list_len(args) < paramlen) : (tl_list_len(args) != paramlen)) {
			tl_error_set(in, tl_new_pair(in, tl_new_pair(in, tl_new_sym(in, "bad arity"), tl_new_pair(in, tl_new_int(in, paramlen), callex)), args));
			tl_cfunc_return(in, in->false_);
		}

		/* Bind parameters into a new env, name by name */
		for(tl_object *acur = callex->args; acur && args; acur = tl_next(acur)) {
			frm = tl_new_pair(in, tl_new_pair(in, tl_first(acur), tl_first(args)), frm);
			args = tl_next(args);
			if(!tl_is_pair(tl_next(acur))) {
				frm = tl_new_pair(in, tl_new_pair(in, tl_next(acur), args), frm);
				break;
			}
		}
	} else if(tl_is_sym(callex->args)) {
		/* Just capture all arguments into the one name in the new frame */
		frm = tl_new_pair(in, tl_new_pair(in, callex->args, args), frm);
	} else {
		tl_error_set(in, tl_new_pair(in, tl_new_sym(in, "bad arg kind"), callex->args));
		tl_cfunc_return(in, in->false_);
	}

	/* For macros: before losing the old one, bind the env */
	if(callex->envn) frm = tl_new_pair(in, tl_new_pair(in, callex->envn, env), frm);

	/* ...and add the frame into the env, creating a new env */
	env = tl_new_pair(in, frm, callex->env);

	/* Push the body (in reverse order) onto the cont stack */
	tl_object *body_rvs = tl_list_rvs(in, callex->body);
	for(tl_list_iter(body_rvs, ex)) {
		tl_push_apply(in, ex == tl_first(body_rvs) ? TL_APPLY_PUSH_EVAL : TL_APPLY_DROP_EVAL, ex, env);
	}
}

/** Run the next step of the interpreter.
 *
 * This is the top-level entry for running a single step of a TinyLISP
 * computation. Its operation is conceptually simple: it pops the top of the
 * continuation stack (or returns DONE if it is empty), and does what that
 * entry says. (For a discussion on those entries, see `tl_push_apply`). The
 * function is complex merely because of the minutiae in handling those
 * entries, as well as the stack manipulation that must occur when an
 * indirection occurs (when `tl_push_eval` cannot provide an immediate value).
 *
 * If all goes well, the callable is resolved to a simple object, and the type
 * of this object determines whether or not arguments are to be popped by-value
 * or by-name; after this is done, it is applied by
 * `_tl_apply_next_body_callable_k`, which is called directly in the case of
 * by-name, or indirectly through `_tl_eval_all_args` in the case of
 * by-value--which incidentally pushes a C continuation on the stack to be
 * encountered in the next iteration of `tl_apply_next`.
 *
 * The return value is one of the `TL_RESULT` values:
 *
 * - `TL_RESULT_DONE`: The computation is finished. No more can be done.
 *   Subsequent calls will continue to return the same, unless any other
 *   continuations are pushed interim. If the computation was aborted in error
 *   (see `tl_has_error`), tl_interp::error is an object that describes the
 *   cause. Otherwise, the top of the value stack is usually whatever is
 *   returned by the "prime mover", the first continuation on the stack, and
 *   may be read off as the whole result of the computation (as whatever the
 *   prime mover returns).
 *
 * - `TL_RESULT_AGAIN`: The computation isn't yet finished; more calls to
 *   `tl_apply_next` are needed. Control is returned to the top-level, however,
 *   for its needs--for example, to check an event loop, or do other
 *   book-keeping. The computation can also be safely abandoned at this point.
 *
 * - `TL_RESULT_GETCHAR`: The computation needs to read an input character.
 *   Before the next invocation of `tl_apply_next`, the runtime expects to have
 *   that character on top of the stack, as a `TL_INT`. If your environment
 *   does not provide input normally, and you've already read in the program as
 *   an expression, it is acceptable to put `tl_new_int(in, EOF)` on top of the
 *   stack. Otherwise, this suspension point is similar to `TL_RESULT_AGAIN`.
 *
 * A typical implementation will invoke `tl_apply_next` until it returns
 * `TL_RESULT_DONE` (a false value). This is what `tl_run_until_done` does,
 * while additionally transparently handling `TL_RESULT_GETCHAR` through
 * tl_interp::readf.
 */
int tl_apply_next(tl_interp *in) {
	tl_object *cont = tl_first(in->conts);
	long len;
	tl_object *callex, *env, *args = TL_EMPTY_LIST;
	int res;
	/*
	tl_printf(in, "Conts: ");
	tl_print(in, in->conts);
	tl_printf(in, "\n");
	*/
	if(tl_has_error(in)) {
		tl_object *rescue = tl_rescue_peek(in);
		if(!rescue) return TL_RESULT_DONE;
		tl_rescue_drop(in);
		/* Trampoline into the rescue continuation--we could do this directly,
		 * but why reinvent the wheel? */
		tl_push_apply(in, 1, rescue, in->env);
		tl_values_push(in, in->error);
		tl_error_clear(in);
		return TL_RESULT_AGAIN;
	}
	in->current = cont;
	if(!cont) return TL_RESULT_DONE;
	in->conts = tl_next(in->conts);
	assert(tl_is_int(tl_first(cont)));
	len = tl_first(cont)->ival;
	callex = tl_first(tl_next(cont));
	env = tl_next(tl_next(cont));
#ifdef CONT_DEBUG
	tl_printf(in, "Apply Next len %ld Callex: ", len);
	tl_print(in, callex);
	tl_printf(in, " ");
#endif
	if(len == TL_APPLY_DROP) {
		in->values = tl_next(in->values);
		return TL_RESULT_AGAIN;
	}
	if(len == TL_APPLY_DROP_RESCUE) {
		tl_rescue_drop(in);
		return TL_RESULT_AGAIN;
	}
	if(len == TL_APPLY_GETCHAR) {
		if(in->is_putback) {
			tl_values_push(in, tl_new_int(in, in->putback));
			in->is_putback = 0;
			return TL_RESULT_AGAIN;
		} else {
			return TL_RESULT_GETCHAR;
		}
	}
	if(len != TL_APPLY_INDIRECT) {
		if((res = tl_push_eval(in, callex, env))) {
			if(!(len == TL_APPLY_PUSH_EVAL || len == TL_APPLY_DROP_EVAL)) {
#ifdef CONT_DEBUG
				tl_printf(in, "[indirected]\n");
#endif
				cont = tl_first(in->conts);
				in->conts = tl_next(in->conts);
				tl_push_apply(in, TL_APPLY_INDIRECT, tl_new_int(in, len), env);
				in->conts = tl_new_pair(in, cont, in->conts);
			} else if(len == TL_APPLY_DROP_EVAL) {
				cont = tl_first(in->conts);
				in->conts = tl_next(in->conts);
				tl_push_apply(in, TL_APPLY_DROP, TL_EMPTY_LIST, TL_EMPTY_LIST);
				in->conts = tl_new_pair(in, cont, in->conts);
			}
			return res;
		}
	} else {
#ifdef CONT_DEBUG
		tl_printf(in, "[resuming indirect]");
#endif
		len = tl_first(tl_next(cont))->ival;
	}
#ifdef CONT_DEBUG
	tl_printf(in, "\n");
#endif
	tl_values_pop_into(in, callex);
#ifdef CONT_DEBUG
	tl_printf(in, "Apply Next: %ld call: ", len);
	tl_print(in, callex);
	tl_printf(in, " values: ");
	tl_print(in, in->values);
	/*
	tl_printf(in, " env: ");
	tl_print(in, env);
	*/
	tl_printf(in, " stack: ");
	for(tl_list_iter(in->conts, ct)) {
		tl_print(in, tl_new_pair(in, tl_first(ct), tl_first(tl_next(ct))));
	}
	tl_printf(in, "\n");
#endif
	if(len == TL_APPLY_DROP_EVAL) return TL_RESULT_AGAIN;
	if(len == TL_APPLY_PUSH_EVAL) {
		tl_values_push(in, callex);
		return TL_RESULT_AGAIN;
	}
	if(!tl_is_callable(callex)) {
		tl_error_set(in, tl_new_pair(in, tl_new_sym(in, "call non-callable"), callex));
		return TL_RESULT_AGAIN;
	}
	for(int i = 0; i < len; i++) {
		args = tl_new_pair(in, tl_first(in->values), args);
		in->values = tl_next(in->values);
	}
	in->env = env;
	tl_object *new_args = TL_EMPTY_LIST;
	switch(callex->kind) {
		case TL_FUNC:
		case TL_CFUNC_BYVAL:
			tl_eval_all_args(in, args, tl_new_pair(in, tl_new_int(in, len), tl_new_pair(in, callex, env)), _tl_apply_next_body_callable_k);
			break;

		case TL_MACRO:
		case TL_CFUNC:
		case TL_THEN:
			for(tl_list_iter(args, arg)) {
				if(callex->kind != TL_THEN && tl_next(arg) != in->true_) {
					tl_error_set(in, tl_new_pair(in, tl_new_pair(in, tl_new_sym(in, "invoke macro/cfunc with non-syntactic arg"), callex), arg));
					return TL_RESULT_AGAIN;
				}
				new_args = tl_new_pair(in, tl_first(arg), new_args);
			}
			_tl_apply_next_body_callable_k(in, tl_list_rvs(in, new_args), tl_new_pair(in, tl_new_int(in, len), tl_new_pair(in, callex, env)));
			break;

		case TL_CONT:
			if(len != 1) {
				tl_error_set(in, tl_new_pair(in, tl_new_sym(in, "bad cont arity (1)"), args));
				return TL_RESULT_AGAIN;
			}
			in->conts = callex->ret_conts;
			in->values = callex->ret_values;
			in->env = callex->ret_env;
			if(tl_next(tl_first(args)) == in->true_) {
				tl_push_eval(in, tl_first(tl_first(args)), env);
			} else {
				tl_values_push(in, tl_first(tl_first(args)));
			}
			break;

		default:
			assert(0);
			break;
	}
	return TL_RESULT_AGAIN;
}

/** Evaluate an expr, passings its direct value to the C continuation.
 *
 * The function pointer `then` is eventually invoked with the given `state` and
 * an argument list of size 1 containing the result of evaluating `expr`, which
 * is treated as syntactic by virtue of `tl_push_eval`, as long as the
 * computation isn't abandoned.
 */
void _tl_eval_and_then(tl_interp *in, tl_object *expr, tl_object *state, void (*then)(tl_interp *, tl_object *, tl_object *), const char *name) {
	tl_object *tobj = tl_new_then(in, then, state, name);
	tl_push_apply(in, 1, tobj, in->env);
	tl_push_eval(in, expr, in->env);
}

/** Gets a character, passing it to the C continuation.
 *
 * This pushes `TL_APPLY_GETCHAR`, thus signalling the runtime with
 * `TL_RESULT_GETCHAR`, to get an input character and push it to the value
 * stack. The continuation function pointer is then invoked with the given
 * `state` and an argument list consisting of that value, so long as the
 * computation isn't abandoned.
 */
void _tl_getc_and_then(tl_interp *in, tl_object *state, void (*then)(tl_interp *, tl_object *, tl_object *), const char *name) {
	tl_object *tobj = tl_new_then(in, then, state, name);
	tl_push_apply(in, 1, tobj, in->env);
	tl_push_apply(in, TL_APPLY_GETCHAR, TL_EMPTY_LIST, TL_EMPTY_LIST);
}

/** Continuation for `_tl_eval_all_args` (see). */
void _tl_eval_all_args_k(tl_interp *in, tl_object *result, tl_object *state) {
	tl_object *args = tl_first(tl_first(tl_first(state)));
	tl_object *stack = tl_new_pair(in, tl_first(result), tl_next(tl_first(state)));
	tl_object *then = tl_next(state);
	tl_object *new_state = tl_new_pair(in,
		tl_new_pair(in,
			tl_new_pair(in, tl_next(args), TL_EMPTY_LIST),
			stack
		),
		then
	);
	if(args) {
		if(tl_next(tl_first(args)) == in->true_) {
			tl_eval_and_then(in, tl_first(tl_first(args)), new_state, _tl_eval_all_args_k);
		} else {
			tl_values_push(in, tl_first(tl_first(args)));
			tl_push_apply(in, 1, tl_new_then(in, _tl_eval_all_args_k, new_state, "_tl_apply_all_args_k<indirect>"), in->env);
		}
	} else {
		for(tl_list_iter(tl_list_rvs(in, stack), elem)) {
			tl_values_push(in, elem);
		}
		tl_push_apply(in, tl_list_len(stack), then, in->env);
	}
}

/** Evaluate all arguments, passing them to the continuation.
 * 
 * The continuation `then` is eventually invoked with a given `state`, and with
 * arguments consisting of all the evaluated forms of the values in `args`,
 * which is assumed to be a slice of the value stack--in particular, each entry
 * should be a pair (value . syntactic), where syntactic is false if the value
 * is direct (id est, already evaluated) and true if the value is syntactic
 * (needs to be evaluated). The continuation's arguments consist only of the
 * values, once each syntactic one has been directly evaluated.
 */
void _tl_eval_all_args(tl_interp *in, tl_object *args, tl_object *state, void (*then)(tl_interp *, tl_object *, tl_object *), const char *name) {
	tl_object *tobj = tl_new_then(in, then, state, name);
	if(args) {
		tl_object *state = tl_new_pair(in,
			tl_new_pair(in, 
				tl_new_pair(in, tl_next(args), TL_EMPTY_LIST),
				TL_EMPTY_LIST
			),
			tobj
		);
		if(tl_next(tl_first(args)) == in->true_) {
			tl_eval_and_then(in, tl_first(tl_first(args)), state, _tl_eval_all_args_k);
		} else {
			tl_values_push(in, tl_first(tl_first(args)));
			tl_push_apply(in, 1, tl_new_then(in, _tl_eval_all_args_k, state, "_tl_apply_all_args_k<direct>"), in->env);
		}
	} else {
		tl_push_apply(in, 0, tl_new_then(in, then, state, name), in->env);
	}
}

/** Runs an interpreter with one or more pushed evaluations until all evaulation is finished or an error occurs.
 *
 * This simply calls `tl_apply_next` in a loop until it returns
 * `TL_RESULT_DONE`, handing `TL_RESULT_GETCHAR` by using the interpreter's
 * tl_interp::readf.
 */
void tl_run_until_done(tl_interp *in) {
	int res;
	while((res = tl_apply_next(in))) {
		switch(res) {
			case TL_RESULT_AGAIN:
				break;

			case TL_RESULT_GETCHAR:
				tl_values_push(in, tl_new_int(in, (long)tl_getc(in)));
				break;

			default:
				assert(0);
				break;
		}
	}
}