diff --git a/examples/strings.hal b/examples/strings.hal new file mode 100644 index 0000000..a31b9cb --- /dev/null +++ b/examples/strings.hal @@ -0,0 +1,11 @@ +--- +globals: 0 +global_pointers: 0 +--- +:0 { args: 0 ptr_args: 0 locals: 0 local_pointers: 0 } { + PushLiteralString "Hello,"; + PushLiteralString " World!"; + ConcatStrings; + PrintString; + Exit; +} diff --git a/include/assembler/lexer.h b/include/assembler/lexer.h index 12cd364..a31832e 100644 --- a/include/assembler/lexer.h +++ b/include/assembler/lexer.h @@ -36,8 +36,12 @@ typedef enum TOKEN_ModI64, TOKEN_Call, TOKEN_PrintTopStackI64, + TOKEN_PushLiteralString, + TOKEN_ConcatStrings, + TOKEN_PrintString, TOKEN_Exit, TOKEN_NUMBER, + TOKEN_STRING, TOKEN_COLON, TOKEN_SEMICOLON, TOKEN_HASHTAG, diff --git a/include/hal64.h b/include/hal64.h index ffe1d38..401c73e 100644 --- a/include/hal64.h +++ b/include/hal64.h @@ -30,23 +30,34 @@ typedef enum OP_MOD_I64, OP_CALL, OP_PRINT_TOP_STACK_I64, + OP_PUSH_LITERAL_STRING, + OP_CONCAT_STRINGS, + OP_PRINT_STRING, OP_EXIT, } InstructionOp; typedef struct { InstructionOp op; - union { + union + { uint64_t immediate; size_t reg; - struct { + struct + { size_t reg; uint64_t immediate; } ri; - struct { + struct + { size_t reg1; size_t reg2; } rr; + struct + { + char *ptr; + size_t size; + } string; } data; } Instruction; @@ -72,13 +83,31 @@ typedef struct typedef struct { - uint64_t *call_stack; - uint64_t *operands_stack; + uint8_t marked; + size_t size; + void *data; +} HeapObject; + +typedef struct { + uint64_t *data; + size_t size; + size_t capacity; +} Array; + +typedef struct { + HeapObject *data; + size_t size; + size_t capacity; +} PointersArray; + +typedef struct +{ + Array call_stack; + Array operands_stack; + PointersArray pointers_stack; + PointersArray objects; uint64_t *locals; - size_t call_stack_size; - size_t call_stack_capacity; - size_t operands_stack_size; - size_t operands_stack_capacity; + size_t allocated_heap_size; } VM; Program init_program(void); diff --git a/src/assembler/assemble.c b/src/assembler/assemble.c index 2ab1e49..914f13c 100644 --- a/src/assembler/assemble.c +++ b/src/assembler/assemble.c @@ -1,6 +1,7 @@ #include #include #include +#include #include "assembler/assembler.h" #include "assembler/lexer.h" #include "utils/memory.h" @@ -167,6 +168,18 @@ read_literal_number() return token; } +static Token +read_literal_string() +{ + Token token; + token = read_token(); + if (token.type != TOKEN_STRING) { + fprintf(stderr, "Expected string, got %s\n", token.value); + exit(EXIT_FAILURE); + } + return token; +} + static void read_ri(Instruction *instruction) { Token token; @@ -297,6 +310,18 @@ read_instruction(Instruction *instruction) case TOKEN_PrintTopStackI64: instruction->op = OP_PRINT_TOP_STACK_I64; break; + case TOKEN_PushLiteralString: + instruction->op = OP_PUSH_LITERAL_STRING; + token = read_literal_string(); + instruction->data.string.ptr = strdup(token.value); + instruction->data.string.size = strlen(token.value); + break; + case TOKEN_ConcatStrings: + instruction->op = OP_CONCAT_STRINGS; + break; + case TOKEN_PrintString: + instruction->op = OP_PRINT_STRING; + break; case TOKEN_Exit: instruction->op = OP_EXIT; break; diff --git a/src/hal64.c b/src/hal64.c index bab23b5..f83bf9e 100644 --- a/src/hal64.c +++ b/src/hal64.c @@ -91,6 +91,15 @@ instruction_as_string(Instruction instruction, char *string, size_t max_length) case OP_PRINT_TOP_STACK_I64: snprintf(string, max_length, "PRINT_TOP_STACK_I64"); break; + case OP_PUSH_LITERAL_STRING: + snprintf(string, max_length, "PUSH_LITERAL_STRING \"%s\"", instruction.data.string); + break; + case OP_CONCAT_STRINGS: + snprintf(string, max_length, "CONCAT_STRINGS"); + break; + case OP_PRINT_STRING: + snprintf(string, max_length, "PRINT_STRING"); + break; default: snprintf(string, max_length, "UNKNOWN"); break; diff --git a/src/lexer.l b/src/lexer.l index cd8ccac..eb60edc 100644 --- a/src/lexer.l +++ b/src/lexer.l @@ -8,6 +8,7 @@ %% [ \t\n]+ { /* ignore whitespace */ } +\"([^\\"]|\\.)*\" { return TOKEN_STRING;} [0-9]+ { return TOKEN_NUMBER; } "---" { return TOKEN_HEADER_SEPARATOR; } "globals" { return TOKEN_GLOBALS; } @@ -46,6 +47,9 @@ "ModI64" { return TOKEN_ModI64; } "Call" { return TOKEN_Call; } "PrintTopStackI64" { return TOKEN_PrintTopStackI64; } +"PushLiteralString" { return TOKEN_PushLiteralString; } +"ConcatStrings" { return TOKEN_ConcatStrings; } +"PrintString" { return TOKEN_PrintString; } "Exit" { return TOKEN_Exit; } %% @@ -70,6 +74,11 @@ Token read_token(void) { token.value[0] = '\0'; return token; } + if (token.type == TOKEN_STRING) { + strncpy(token.value, yytext + 1, yyleng - 2); + token.value[strlen(yytext) - 2] = '\0'; + return token; + } strcpy(token.value, yytext); return token; } diff --git a/src/vm.c b/src/vm.c index 8337c73..01569fb 100644 --- a/src/vm.c +++ b/src/vm.c @@ -1,5 +1,6 @@ #include #include +#include #include "hal64.h" #include "utils/memory.h" @@ -7,55 +8,94 @@ VM init_vm(void) { VM vm; - vm.call_stack_size = 0; - vm.operands_stack_size = 0; - vm.call_stack_capacity = 1024; - vm.operands_stack_capacity = 1024; - vm.call_stack = safe_malloc(vm.call_stack_capacity * sizeof(uint64_t)); - vm.operands_stack = safe_malloc(vm.operands_stack_capacity * sizeof(uint64_t)); + vm.call_stack.size = 0; + vm.operands_stack.size = 0; + vm.pointers_stack.size = 0; + vm.objects.size = 0; + vm.call_stack.capacity = 1024; + vm.operands_stack.capacity = 1024; + vm.pointers_stack.capacity = 1024; + vm.objects.capacity = 1024; + vm.call_stack.data = safe_malloc(vm.call_stack.capacity * sizeof(uint64_t)); + vm.operands_stack.data = safe_malloc(vm.operands_stack.capacity * sizeof(uint64_t)); + vm.pointers_stack.data = safe_malloc(vm.pointers_stack.capacity * sizeof(HeapObject)); + vm.objects.data = safe_malloc(vm.objects.capacity * sizeof(HeapObject)); return vm; } void free_vm(VM vm) { - free(vm.call_stack); - free(vm.operands_stack); + free(vm.call_stack.data); + free(vm.operands_stack.data); +} + +static HeapObject +new_heap_object(VM *vm, size_t size) +{ + HeapObject object; + object.size = size; + object.data = safe_malloc(size * sizeof(uint64_t)); + object.marked = 0; + if (vm->objects.size >= vm->objects.capacity) { + vm->objects.capacity *= 2; + vm->objects.data = safe_realloc(vm->objects.data, vm->objects.capacity * sizeof(HeapObject)); + } + vm->objects.data[vm->objects.size++] = object; + vm->allocated_heap_size += size * sizeof(uint64_t); + return object; } static void push_stack(VM *vm, uint64_t value) { - if (vm->operands_stack_size >= vm->operands_stack_capacity) { - vm->operands_stack_capacity *= 2; - vm->operands_stack = safe_realloc(vm->operands_stack, vm->operands_stack_capacity * sizeof(uint64_t)); + if (vm->operands_stack.size >= vm->operands_stack.capacity) { + vm->operands_stack.capacity *= 2; + vm->operands_stack.data = safe_realloc(vm->operands_stack.data, vm->operands_stack.capacity * sizeof(uint64_t)); + } + vm->operands_stack.data[vm->operands_stack.size++] = value; +} + +static void +push_pointer_stack(VM *vm, HeapObject value) +{ + if (vm->pointers_stack.size >= vm->pointers_stack.capacity) { + vm->pointers_stack.capacity *= 2; + vm->pointers_stack.data = + safe_realloc(vm->pointers_stack.data, vm->pointers_stack.capacity * sizeof(uint64_t *)); } - vm->operands_stack[vm->operands_stack_size++] = value; + vm->pointers_stack.data[vm->pointers_stack.size++] = value; } static uint64_t pop_stack(VM *vm) { - return vm->operands_stack[--vm->operands_stack_size]; + return vm->operands_stack.data[--vm->operands_stack.size]; +} + +static HeapObject +pop_pointer_stack(VM *vm) +{ + return vm->pointers_stack.data[--vm->pointers_stack.size]; } static uint64_t top_stack(VM *vm) { - return vm->operands_stack[vm->operands_stack_size - 1]; + return vm->operands_stack.data[vm->operands_stack.size - 1]; } static size_t get_stack_frame_size(VM *vm) { - return vm->call_stack[vm->call_stack_size - 1]; + return vm->call_stack.data[vm->call_stack.size - 1]; } static void pop_stack_frame(VM *vm) { - vm->call_stack_size -= get_stack_frame_size(vm); - vm->locals = vm->call_stack + vm->call_stack_size - get_stack_frame_size(vm); + vm->call_stack.size -= get_stack_frame_size(vm); + vm->locals = vm->call_stack.data + vm->call_stack.size - get_stack_frame_size(vm); } static void @@ -64,16 +104,16 @@ call_function(VM *vm, const Program *program, size_t current_function, size_t cu uint64_t i; Function function = program->functions[next_function]; - if (vm->call_stack_size + function.stack_frame_size >= vm->call_stack_capacity) { - vm->call_stack_capacity *= 2; - vm->call_stack = safe_realloc(vm->call_stack, vm->call_stack_capacity * sizeof(uint64_t)); + if (vm->call_stack.size + function.stack_frame_size >= vm->call_stack.capacity) { + vm->call_stack.capacity *= 2; + vm->call_stack.data = safe_realloc(vm->call_stack.data, vm->call_stack.capacity * sizeof(uint64_t)); } - vm->locals = vm->call_stack + vm->call_stack_size; - vm->call_stack_size += function.stack_frame_size; - vm->call_stack[vm->call_stack_size - 1] = function.stack_frame_size; - vm->call_stack[vm->call_stack_size - 2] = current_instruction; - vm->call_stack[vm->call_stack_size - 3] = current_function; + vm->locals = vm->call_stack.data + vm->call_stack.size; + vm->call_stack.size += function.stack_frame_size; + vm->call_stack.data[vm->call_stack.size - 1] = function.stack_frame_size; + vm->call_stack.data[vm->call_stack.size - 2] = current_instruction; + vm->call_stack.data[vm->call_stack.size - 3] = current_function; for (i = function.args_count - 1; i != -1; i--) vm->locals[i] = pop_stack(vm); @@ -86,13 +126,12 @@ execute_program(Program program) char buff[256]; Function *func = program.functions; Instruction *instr; - uint64_t a, b; - vm.call_stack_size = func->stack_frame_size; - vm.locals = vm.call_stack; - vm.call_stack[vm.call_stack_size - 1] = vm.call_stack_size; - vm.call_stack[vm.call_stack_size - 2] = 0; - vm.call_stack[vm.call_stack_size - 3] = 0; + vm.call_stack.size = func->stack_frame_size; + vm.locals = vm.call_stack.data; + vm.call_stack.data[vm.call_stack.size - 1] = vm.call_stack.size; + vm.call_stack.data[vm.call_stack.size - 2] = 0; + vm.call_stack.data[vm.call_stack.size - 3] = 0; for (instr = func->instructions;; instr++) { switch (instr->op) { case OP_PUSH_I64: @@ -111,8 +150,8 @@ execute_program(Program program) push_stack(&vm, vm.locals[instr->data.ri.reg] - instr->data.ri.immediate); break; case OP_SUB_I64: { - b = pop_stack(&vm); - a = pop_stack(&vm); + uint64_t b = pop_stack(&vm); + uint64_t a = pop_stack(&vm); push_stack(&vm, a - b); } break; @@ -120,14 +159,14 @@ execute_program(Program program) push_stack(&vm, pop_stack(&vm) * pop_stack(&vm)); break; case OP_DIV_I64: { - b = pop_stack(&vm); - a = pop_stack(&vm); + uint64_t b = pop_stack(&vm); + uint64_t a = pop_stack(&vm); push_stack(&vm, a / b); } break; case OP_MOD_I64: { - b = pop_stack(&vm); - a = pop_stack(&vm); + uint64_t b = pop_stack(&vm); + uint64_t a = pop_stack(&vm); push_stack(&vm, a % b); } break; @@ -165,11 +204,33 @@ execute_program(Program program) instr = func->instructions - 1; break; case OP_RETURN: { - func = program.functions + vm.call_stack[vm.call_stack_size - 3]; - instr = func->instructions + vm.call_stack[vm.call_stack_size - 2]; + func = program.functions + vm.call_stack.data[vm.call_stack.size - 3]; + instr = func->instructions + vm.call_stack.data[vm.call_stack.size - 2]; pop_stack_frame(&vm); } break; + case OP_PUSH_LITERAL_STRING: { + HeapObject object = new_heap_object(&vm, instr->data.string.size); + memcpy(object.data, instr->data.string.ptr, instr->data.string.size); + push_pointer_stack(&vm, object); + } + break; + case OP_CONCAT_STRINGS: { + HeapObject b = pop_pointer_stack(&vm); + HeapObject a = pop_pointer_stack(&vm); + HeapObject object = new_heap_object(&vm, a.size + b.size); + memcpy(object.data, a.data, a.size); + memcpy(object.data + a.size, b.data, b.size); + push_pointer_stack(&vm, object); + } + break; + case OP_PRINT_STRING: { + HeapObject object = pop_pointer_stack(&vm); + size_t i; + for (i = 0; i < object.size; i++) + putchar(((char *)object.data)[i]); + } + break; default: instruction_as_string(*instr, buff, 256); fprintf(stderr, "Unknown instruction: %s\n", buff);