From 099b502e6b1e100dc85b02170861c927a6895d12 Mon Sep 17 00:00:00 2001 From: tyfkda Date: Fri, 5 Apr 2024 09:17:01 +0900 Subject: [PATCH 1/2] Specify include directory by source folder --- Makefile | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 01b8de067..5bee24566 100644 --- a/Makefile +++ b/Makefile @@ -35,8 +35,7 @@ CC1_BE_DIR:=$(CC1_DIR)/backend OPTIMIZE:=-O2 -g3 CFLAGS:=-ansi -std=c11 -pedantic -MMD -Wall -Wextra -Werror -Wold-style-definition \ -Wno-missing-field-initializers -Wno-empty-body \ - -D_DEFAULT_SOURCE $(OPTIMIZE) \ - -I$(CC1_FE_DIR) -I$(CC1_BE_DIR) -I$(CC1_ARCH_DIR) -I$(UTIL_DIR) + -D_DEFAULT_SOURCE $(OPTIMIZE) -I$(UTIL_DIR) ifneq ("$(NO_FLONUM)","") CFLAGS+=-D__NO_FLONUM endif @@ -90,6 +89,13 @@ as_SRCS:=$(wildcard $(AS_DIR)/*.c) \ ld_SRCS:=$(wildcard $(LD_DIR)/*.c) $(UTIL_DIR)/archive.c \ $(UTIL_DIR)/gen_section.c $(UTIL_DIR)/util.c $(UTIL_DIR)/elfutil.c $(UTIL_DIR)/table.c +src_cc_CFLAGS:=-I$(CC1_FE_DIR) -I$(CC1_BE_DIR) -I$(CC1_ARCH_DIR) # arch required for builtin.c +src_cc_frontend_CFLAGS:=-I$(CC1_FE_DIR) +src_cc_backend_CFLAGS:=-I$(CC1_FE_DIR) -I$(CC1_BE_DIR) -I$(CC1_ARCH_DIR) +src_cc_arch_$(ARCHTYPE)_CFLAGS:=-I$(CC1_FE_DIR) -I$(CC1_BE_DIR) +src_cpp_CFLAGS:=-I$(CC1_FE_DIR) +src__debug_CFLAGS:=-I$(CC1_FE_DIR) -I$(CC1_BE_DIR) + .PHONY: all all: exes libs @@ -112,7 +118,9 @@ $(foreach D, $(EXES), $(eval $(call DEFINE_EXE_TARGET,$(D)))) define DEFINE_OBJ_TARGET $(OBJ_DIR)/%.o: $(1)/%.c $(PARENT_DEPS) @mkdir -p $(OBJ_DIR) - $(CC) $(CFLAGS) -DXCC_TARGET_ARCH=XCC_ARCH_$(ARCHTYPE_UPPER) -c -o $$@ $$< + $(CC) $(CFLAGS) -DXCC_TARGET_ARCH=XCC_ARCH_$(ARCHTYPE_UPPER) \ + $$($(subst /,_,$(1))_CFLAGS) \ + -c -o $$@ $$< endef XCC_SRC_DIRS:=$(XCC_DIR) $(CC1_FE_DIR) $(CC1_BE_DIR) $(CC1_DIR) $(CC1_ARCH_DIR) $(CPP_DIR) \ $(AS_DIR) $(LD_DIR) $(UTIL_DIR) $(DEBUG_DIR) @@ -190,7 +198,7 @@ WCC_OBJ_DIR:=obj/wcc WCC_LIB_DIR:=lib WCC_DIR:=src/wcc -WCC_CFLAGS:=$(CFLAGS) -I$(CPP_DIR) +WCC_CFLAGS:=$(CFLAGS) -I$(CPP_DIR) -I$(CC1_FE_DIR) ifneq ("$(HOST_TARGET)","") # Self hosting From d2af408101de9d67ec06cf8a133b620ee66a041c Mon Sep 17 00:00:00 2001 From: tyfkda Date: Sun, 7 Apr 2024 11:24:58 +0900 Subject: [PATCH 2/2] Prepare to assemble more than x64: aarch64 and riscv64 --- Makefile | 6 +- include/elf.h | 10 + src/as/arch/aarch64/asm_code.c | 101 ++++ src/as/arch/aarch64/inst.h | 58 ++ src/as/arch/aarch64/ir_asm.c | 129 +++++ src/as/arch/aarch64/parse_aarch64.c | 187 +++++++ src/as/arch/riscv64/asm_code.c | 140 +++++ src/as/arch/riscv64/inst.h | 46 ++ src/as/arch/riscv64/ir_asm.c | 129 +++++ src/as/arch/riscv64/parse_riscv64.c | 196 +++++++ src/as/{asm_x86.c => arch/x64/asm_code.c} | 4 +- src/as/{ => arch/x64}/inst.h | 154 +----- src/as/{ => arch/x64}/ir_asm.c | 74 +-- src/as/arch/x64/parse_x64.c | 434 +++++++++++++++ src/as/as.c | 11 +- src/as/{asm_x86.h => asm_code.h} | 2 +- src/as/ir_asm.h | 2 +- src/as/parse_asm.c | 621 +++------------------- src/as/parse_asm.h | 71 ++- src/cc/arch/riscv64/ir_riscv64.c | 4 +- src/ld/ld.c | 7 +- src/util/elfutil.c | 4 +- src/util/elfutil.h | 2 +- 23 files changed, 1623 insertions(+), 769 deletions(-) create mode 100644 src/as/arch/aarch64/asm_code.c create mode 100644 src/as/arch/aarch64/inst.h create mode 100644 src/as/arch/aarch64/ir_asm.c create mode 100644 src/as/arch/aarch64/parse_aarch64.c create mode 100644 src/as/arch/riscv64/asm_code.c create mode 100644 src/as/arch/riscv64/inst.h create mode 100644 src/as/arch/riscv64/ir_asm.c create mode 100644 src/as/arch/riscv64/parse_riscv64.c rename src/as/{asm_x86.c => arch/x64/asm_code.c} (99%) rename src/as/{ => arch/x64}/inst.h (62%) rename src/as/{ => arch/x64}/ir_asm.c (84%) create mode 100644 src/as/arch/x64/parse_x64.c rename src/as/{asm_x86.h => asm_code.h} (95%) diff --git a/Makefile b/Makefile index 5bee24566..b39e06fec 100644 --- a/Makefile +++ b/Makefile @@ -28,6 +28,7 @@ ifeq ("$(ARCHTYPE)", "") endif ARCHTYPE_UPPER:=$(shell echo "$(ARCHTYPE)" | tr \'[a-z]\' \'[A-Z]\') +AS_ARCH_DIR:=$(AS_DIR)/arch/$(ARCHTYPE) CC1_ARCH_DIR:=$(CC1_DIR)/arch/$(ARCHTYPE) CC1_FE_DIR:=$(CC1_DIR)/frontend CC1_BE_DIR:=$(CC1_DIR)/backend @@ -85,10 +86,13 @@ cc1_SRCS:=$(wildcard $(CC1_FE_DIR)/*.c) $(wildcard $(CC1_BE_DIR)/*.c) $(wildcard cpp_SRCS:=$(wildcard $(CPP_DIR)/*.c) \ $(CC1_DIR)/lexer.c $(UTIL_DIR)/util.c $(UTIL_DIR)/table.c as_SRCS:=$(wildcard $(AS_DIR)/*.c) \ + $(wildcard $(AS_ARCH_DIR)/*.c) \ $(UTIL_DIR)/gen_section.c $(UTIL_DIR)/util.c $(UTIL_DIR)/elfutil.c $(UTIL_DIR)/table.c ld_SRCS:=$(wildcard $(LD_DIR)/*.c) $(UTIL_DIR)/archive.c \ $(UTIL_DIR)/gen_section.c $(UTIL_DIR)/util.c $(UTIL_DIR)/elfutil.c $(UTIL_DIR)/table.c +src_as_CFLAGS:=-I$(AS_DIR) -I$(AS_ARCH_DIR) +src_as_arch_$(ARCHTYPE)_CFLAGS:=-I$(AS_DIR) -I$(AS_ARCH_DIR) src_cc_CFLAGS:=-I$(CC1_FE_DIR) -I$(CC1_BE_DIR) -I$(CC1_ARCH_DIR) # arch required for builtin.c src_cc_frontend_CFLAGS:=-I$(CC1_FE_DIR) src_cc_backend_CFLAGS:=-I$(CC1_FE_DIR) -I$(CC1_BE_DIR) -I$(CC1_ARCH_DIR) @@ -123,7 +127,7 @@ $(OBJ_DIR)/%.o: $(1)/%.c $(PARENT_DEPS) -c -o $$@ $$< endef XCC_SRC_DIRS:=$(XCC_DIR) $(CC1_FE_DIR) $(CC1_BE_DIR) $(CC1_DIR) $(CC1_ARCH_DIR) $(CPP_DIR) \ - $(AS_DIR) $(LD_DIR) $(UTIL_DIR) $(DEBUG_DIR) + $(AS_DIR) $(AS_ARCH_DIR) $(LD_DIR) $(UTIL_DIR) $(DEBUG_DIR) $(foreach D, $(XCC_SRC_DIRS), $(eval $(call DEFINE_OBJ_TARGET,$(D)))) .PHONY: test diff --git a/include/elf.h b/include/elf.h index 4b0d8ae6e..7161104c3 100644 --- a/include/elf.h +++ b/include/elf.h @@ -65,6 +65,16 @@ struct proghdr { #define ET_REL (1) // Relocatable file #define ET_EXEC (2) // Executable file +// Flags +// RISC-V +#define EF_RISCV_RVC (0x01) +#define EF_RISCV_FLOAT_ABI_SOFT (0x00) +#define EF_RISCV_FLOAT_ABI_SINGLE (0x02) +#define EF_RISCV_FLOAT_ABI_DOUBLE (0x04) +#define EF_RISCV_FLOAT_ABI_QUAD (0x06) +#define EF_RISCV_RVE (0x08) +#define EF_RISCV_TSO (0x10) + // Values for Proghdr type #define PT_LOAD (1) diff --git a/src/as/arch/aarch64/asm_code.c b/src/as/arch/aarch64/asm_code.c new file mode 100644 index 000000000..0752675ea --- /dev/null +++ b/src/as/arch/aarch64/asm_code.c @@ -0,0 +1,101 @@ +#include "../../../config.h" +#include "asm_code.h" + +#include +#include // memcpy + +#include "inst.h" +#include "parse_asm.h" +#include "util.h" + +#ifndef MAKE_CODE16 +#define MAKE_CODE16(inst, code, ...) do { unsigned short buf[] = {__VA_ARGS__}; make_code16(inst, code, buf, sizeof(buf)); } while (0) +#endif + +#ifndef MAKE_CODE32 +#define MAKE_CODE32(inst, code, ...) do { unsigned int buf[] = {__VA_ARGS__}; make_code32(inst, code, buf, sizeof(buf)); } while (0) +#endif + +void make_code16(Inst *inst, Code *code, unsigned short *buf, int len) { + assert(len <= (int)sizeof(code->buf)); + code->inst = inst; + code->len = len; + memcpy(code->buf, buf, len); +} + +void make_code32(Inst *inst, Code *code, unsigned int *buf, int len) { + assert(len <= (int)sizeof(code->buf)); + code->inst = inst; + code->len = len; + memcpy(code->buf, buf, len); +} + +inline bool assemble_error(const ParseInfo *info, const char *message) { + parse_error(info, message); + return false; +} + +static unsigned char *asm_noop(Inst *inst, Code *code) { + UNUSED(inst); + unsigned char *p = code->buf; + return p; +} + +static unsigned char *asm_mov(Inst *inst, Code *code) { + Operand *opr1 = &inst->opr1; + Operand *opr2 = &inst->opr2; + uint32_t x = 0x52800000U | (opr1->reg.size == REG64 ? (1U << 31) : 0U) | (opr2->immediate << 5) | opr1->reg.no; + MAKE_CODE32(inst, code, x); + return code->buf; +} + +static unsigned char *asm_ret(Inst *inst, Code *code) { + MAKE_CODE32(inst, code, 0xd65f03c0); + return code->buf; +} + +//////////////////////////////////////////////// + +typedef unsigned char *(*AsmInstFunc)(Inst *inst, Code *code); +typedef struct { + AsmInstFunc func; + enum OperandType opr1_type; + enum OperandType opr2_type; + enum OperandType opr3_type; + int flag; +} AsmInstTable; + +static const AsmInstTable *table[] = { + [NOOP] = (const AsmInstTable[]){ {asm_noop, NOOPERAND, NOOPERAND, NOOPERAND}, {NULL} }, + [MOV] = (const AsmInstTable[]){ {asm_mov, REG, IMMEDIATE, NOOPERAND}, {NULL} }, + [RET] = (const AsmInstTable[]){ {asm_ret, NOOPERAND, NOOPERAND, NOOPERAND}, {NULL} }, +}; + +void assemble_inst(Inst *inst, const ParseInfo *info, Code *code) { + code->flag = 0; + code->len = 0; + + const AsmInstTable *pt = NULL; + if (inst->op < (enum Opcode)ARRAY_SIZE(table) && table[inst->op] != NULL) { + for (const AsmInstTable *p = table[inst->op]; p->func != NULL; ++p) { + if (inst->opr1.type == p->opr1_type && inst->opr2.type == p->opr2_type && inst->opr3.type == p->opr3_type) { + pt = p; + break; + } + } + } + + if (pt != NULL) { + unsigned char *p = (*pt->func)(inst, code); + if (p != NULL) { + if (p > code->buf) { + code->inst = inst; + code->len = p - code->buf; + assert((size_t)code->len <= sizeof(code->buf)); + } + return; + } + } + + assemble_error(info, "Illegal opeand"); +} diff --git a/src/as/arch/aarch64/inst.h b/src/as/arch/aarch64/inst.h new file mode 100644 index 000000000..cfddd4262 --- /dev/null +++ b/src/as/arch/aarch64/inst.h @@ -0,0 +1,58 @@ +// aarch64 Instruction + +#pragma once + +#include // int64_t + +typedef struct Expr Expr; + +// Must match the order with kOpTable in parse_aarch64.c +enum Opcode { + NOOP, + MOV, + RET, +}; + +enum RegType { + NOREG, + + // 32bit + W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, + W16, W17, W18, W19, W20, W21, W22, W23, W24, W25, W26, W27, W28, W29, W30, W31, + + // 64bit + X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, + X16, X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, //X29, X30, X31, + FP, LR, SP, +}; + +enum RegSize { + REG32, + REG64, +}; + +typedef struct { + char size; // RegSize + char no; // 0~31 +} Reg; + +enum OperandType { + NOOPERAND, + REG, // reg + IMMEDIATE, // 1234 +}; + +typedef struct { + enum OperandType type; + union { + Reg reg; + int64_t immediate; + }; +} Operand; + +typedef struct Inst { + enum Opcode op; + Operand opr1; + Operand opr2; + Operand opr3; +} Inst; diff --git a/src/as/arch/aarch64/ir_asm.c b/src/as/arch/aarch64/ir_asm.c new file mode 100644 index 000000000..de827e24a --- /dev/null +++ b/src/as/arch/aarch64/ir_asm.c @@ -0,0 +1,129 @@ +#include "../../../config.h" +#include "ir_asm.h" + +#include "gen_section.h" +#include "table.h" +#include "util.h" + +static LabelInfo *new_label(int section, uintptr_t address) { + LabelInfo *info = malloc_or_die(sizeof(*info)); + info->section = section; + info->flag = 0; + info->address = address; + info->kind = LK_NONE; + return info; +} + +LabelInfo *add_label_table(Table *label_table, const Name *label, int section, bool define, bool global) { + LabelInfo *info = table_get(label_table, label); + if (info != NULL) { + if (define) { + if ((info->flag & LF_DEFINED) != 0) { + fprintf(stderr, "`%.*s' already defined\n", NAMES(label)); + return NULL; + } + info->address = 1; + info->section = section; + } + } else { + info = new_label(section, 0); + table_put(label_table, label, info); + } + if (define) + info->flag |= LF_DEFINED; + if (global) + info->flag |= LF_GLOBAL; + return info; +} + +IR *new_ir_label(const Name *label) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = IR_LABEL; + ir->label = label; + return ir; +} + +IR *new_ir_code(const Code *code) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = IR_CODE; + ir->code = *code; + return ir; +} + +IR *new_ir_data(const void *data, size_t size) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = IR_DATA; + ir->data.len = size; + ir->data.buf = (unsigned char*)data; + return ir; +} + +IR *new_ir_bss(size_t size) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = IR_BSS; + ir->bss = size; + return ir; +} + +IR *new_ir_align(int align) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = IR_ALIGN; + ir->align = align; + return ir; +} + +IR *new_ir_expr(enum IrKind kind, const Expr *expr) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = kind; + ir->expr = expr; + return ir; +} + +bool calc_label_address(uintptr_t start_address, Vector **section_irs, Table *label_table) { + UNUSED(start_address); + UNUSED(section_irs); + UNUSED(label_table); + return true; +} + +bool resolve_relative_address(Vector **section_irs, Table *label_table, Vector *unresolved) { + UNUSED(section_irs); + UNUSED(label_table); + UNUSED(unresolved); + return true; +} + +void emit_irs(Vector **section_irs) { + for (int sec = 0; sec < SECTION_COUNT; ++sec) { + Vector *irs = section_irs[sec]; + for (int i = 0, len = irs->len; i < len; ++i) { + IR *ir = irs->data[i]; + switch (ir->kind) { + case IR_LABEL: + break; + case IR_CODE: + add_code(ir->code.buf, ir->code.len); + break; + case IR_DATA: + add_section_data(sec, ir->data.buf, ir->data.len); + break; + case IR_BSS: + add_bss(ir->bss); + break; + case IR_ALIGN: + align_section_size(sec, ir->align); + break; + case IR_EXPR_BYTE: + case IR_EXPR_SHORT: + case IR_EXPR_LONG: + case IR_EXPR_QUAD: + { + int64_t zero = 0; + int size = 1 << (ir->kind - IR_EXPR_BYTE); + add_section_data(sec, &zero, size); // TODO: Target endian + } + break; + } + } + } +} diff --git a/src/as/arch/aarch64/parse_aarch64.c b/src/as/arch/aarch64/parse_aarch64.c new file mode 100644 index 000000000..5ff3fb97f --- /dev/null +++ b/src/as/arch/aarch64/parse_aarch64.c @@ -0,0 +1,187 @@ +#include "../../../config.h" +#include "parse_asm.h" + +#include +#include +#include +#include + +#include "util.h" + +// Align with Opcode. +static const char *kOpTable[] = { + "mov", + "ret", +}; + +static const struct { + const char *name; + enum RegType reg; +} kRegisters[] = { + {"w0", W0}, + {"w1", W1}, + {"w2", W2}, + {"w3", W3}, + {"w4", W4}, + {"w5", W5}, + {"w6", W6}, + {"w7", W7}, + {"w8", W8}, + {"w9", W9}, + {"w10", W10}, + {"w11", W11}, + {"w12", W12}, + {"w13", W13}, + {"w14", W14}, + {"w15", W15}, + {"w16", W16}, + {"w17", W17}, + {"w18", W18}, + {"w19", W19}, + {"w20", W20}, + {"w21", W21}, + {"w22", W22}, + {"w23", W23}, + {"w24", W24}, + {"w25", W25}, + {"w26", W26}, + {"w27", W27}, + {"w28", W28}, + {"w29", W29}, + {"w30", W30}, + {"w31", W31}, + {"x0", X0}, + {"x1", X1}, + {"x2", X2}, + {"x3", X3}, + {"x4", X4}, + {"x5", X5}, + {"x6", X6}, + {"x7", X7}, + {"x8", X8}, + {"x9", X9}, + {"x10", X10}, + {"x11", X11}, + {"x12", X12}, + {"x13", X13}, + {"x14", X14}, + {"x15", X15}, + {"x16", X16}, + {"x17", X17}, + {"x18", X18}, + {"x19", X19}, + {"x20", X20}, + {"x21", X21}, + {"x22", X22}, + {"x23", X23}, + {"x24", X24}, + {"x25", X25}, + {"x26", X26}, + {"x27", X27}, + {"x28", X28}, + {"fp", FP}, + {"lr", LR}, + {"sp", SP}, + // Alias + {"x29", FP}, + {"x30", LR}, + {"x31", SP}, +}; + +inline bool is_reg32(enum RegType reg) { + return reg >= W0 && reg <= W31; +} + +inline bool is_reg64(enum RegType reg) { + return reg >= X0 && reg <= SP; +} + +static int find_match_index(const char **pp, const char **table, size_t count) { + const char *p = *pp; + const char *start = p; + + while (isalnum(*p)) + ++p; + if (*p == '\0' || isspace(*p)) { + size_t n = p - start; + for (size_t i = 0; i < count; ++i) { + const char *name = table[i]; + size_t len = strlen(name); + if (n == len && strncasecmp(start, name, n) == 0) { + *pp = skip_whitespaces(p); + return i; + } + } + } + return -1; +} + +static enum Opcode find_opcode(ParseInfo *info) { + return find_match_index(&info->p, kOpTable, ARRAY_SIZE(kOpTable)) + 1; +} + +static enum RegType find_register(const char **pp) { + const char *p = *pp; + for (int i = 0; i < (int)ARRAY_SIZE(kRegisters); ++i) { + const char *name = kRegisters[i].name; + size_t n = strlen(name); + if (strncmp(p, name, n) == 0 && !isdigit(p[n])) { + *pp = p + n; + return kRegisters[i].reg; + } + } + return NOREG; +} + +static bool parse_operand(ParseInfo *info, Operand *operand) { + const char *p = info->p; + if (*p == '#') { + info->p = p + 1; + if (!immediate(&info->p, &operand->immediate)) + parse_error(info, "Syntax error"); + operand->type = IMMEDIATE; + return true; + } + + enum RegType reg = find_register(&info->p); + if (reg != NOREG) { + enum RegSize size; + int no; + if (is_reg32(reg)) { + size = REG32; + no = reg - W0; + } else if (is_reg64(reg)) { + size = REG64; + no = reg - X0; + } else { + parse_error(info, "Illegal register"); + return false; + } + + operand->type = REG; + operand->reg.size = size; + operand->reg.no = no; + return true; + } + + return false; +} + +void parse_inst(ParseInfo *info, Inst *inst) { + Operand *opr_table[] = {&inst->opr1, &inst->opr2, &inst->opr3}; + for (int i = 0; i < (int)ARRAY_SIZE(opr_table); ++i) + opr_table[i]->type = NOOPERAND; + + enum Opcode op = find_opcode(info); + inst->op = op; + if (op != NOOP) { + for (int i = 0; i < (int)ARRAY_SIZE(opr_table); ++i) { + if (!parse_operand(info, opr_table[i])) + break; + info->p = skip_whitespaces(info->p); + if (i == (int)ARRAY_SIZE(opr_table) - 1 || *info->p != ',') + break; + info->p = skip_whitespaces(info->p + 1); + } + } +} diff --git a/src/as/arch/riscv64/asm_code.c b/src/as/arch/riscv64/asm_code.c new file mode 100644 index 000000000..f6ba84e45 --- /dev/null +++ b/src/as/arch/riscv64/asm_code.c @@ -0,0 +1,140 @@ +#include "../../../config.h" +#include "asm_code.h" + +#include +#include // memcpy + +#include "inst.h" +#include "parse_asm.h" +#include "util.h" + +#ifndef MAKE_CODE16 +#define MAKE_CODE16(inst, code, ...) do { unsigned short buf[] = {__VA_ARGS__}; make_code16(inst, code, buf, sizeof(buf)); } while (0) +#endif + +#ifndef MAKE_CODE32 +#define MAKE_CODE32(inst, code, ...) do { unsigned int buf[] = {__VA_ARGS__}; make_code32(inst, code, buf, sizeof(buf)); } while (0) +#endif + +void make_code16(Inst *inst, Code *code, unsigned short *buf, int len) { + assert(len <= (int)sizeof(code->buf)); + code->inst = inst; + code->len = len; + memcpy(code->buf, buf, len); +} + +void make_code32(Inst *inst, Code *code, unsigned int *buf, int len) { + assert(len <= (int)sizeof(code->buf)); + code->inst = inst; + code->len = len; + memcpy(code->buf, buf, len); +} + +inline bool is_im6(int64_t x) { + return x <= ((1L << 5) - 1) && x >= -(1L << 5); +} + +inline bool is_im12(int64_t x) { + return x <= ((1L << 11) - 1) && x >= -(1L << 11); +} + +inline bool is_im18(int64_t x) { + return x <= ((1L << 17) - 1) && x >= -(1L << 17); +} + +inline bool assemble_error(const ParseInfo *info, const char *message) { + parse_error(info, message); + return false; +} + +// + +#define ZERO 0 + +#define C_LI(rd, imm) MAKE_CODE16(inst, code, 0x4001 | ((imm & 0x20) << 12) | (rd << 7) | ((imm & 0x1f) << 2)) +#define C_LUI(rd, imm) MAKE_CODE16(inst, code, 0x6001 | ((imm & 0x20) << 12) | (rd << 7) | ((imm & 0x1f) << 2)) +#define C_ADDIW(rd, imm) MAKE_CODE16(inst, code, 0x2001 | ((imm & 0x20) << 12) | (rd << 7) | ((imm & 0x1f) << 2)) + +#define ADDIW(rd, rs, imm) MAKE_CODE32(inst, code, 0x00000019 | ((int32_t)imm << 20) | (rs << 15) | (rd << 7)) +#define ADDI(rd, rs, imm) MAKE_CODE32(inst, code, 0x00000013 | ((int32_t)imm << 20) | (rd << 7)) + +#define LI(rd, imm) ADDI(rd, ZERO, imm) +#define MV(rd, rs) ADDI(rd, rs, 0) + +static unsigned char *asm_noop(Inst *inst, Code *code) { + UNUSED(inst); + unsigned char *p = code->buf; + return p; +} + +static unsigned char *asm_li(Inst *inst, Code *code) { + int rd = inst->opr1.reg.no; + int64_t imm =inst->opr2.immediate; + if (is_im6(imm)) { + C_LI(rd, imm); + } else if (is_im12(imm)) { + LI(rd, imm); + } else if (is_im18(imm)) { + int h = imm >> 12, l = imm & 0xfff; + C_LUI(rd, h); + if (is_im6(imm)) + C_ADDIW(rd, l); + else + ADDIW(rd, rd, l); + } else { + // TODO: + return NULL; + } + return code->buf; +} + +static unsigned char *asm_ret(Inst *inst, Code *code) { + MAKE_CODE16(inst, code, 0x8082); + return code->buf; +} + +//////////////////////////////////////////////// + +typedef unsigned char *(*AsmInstFunc)(Inst *inst, Code *code); +typedef struct { + AsmInstFunc func; + enum OperandType opr1_type; + enum OperandType opr2_type; + enum OperandType opr3_type; + int flag; +} AsmInstTable; + +static const AsmInstTable *table[] = { + [NOOP] = (const AsmInstTable[]){ {asm_noop, NOOPERAND, NOOPERAND, NOOPERAND}, {NULL} }, + [LI] = (const AsmInstTable[]){ {asm_li, REG, IMMEDIATE, NOOPERAND}, {NULL} }, + [RET] = (const AsmInstTable[]){ {asm_ret, NOOPERAND, NOOPERAND, NOOPERAND}, {NULL} }, +}; + +void assemble_inst(Inst *inst, const ParseInfo *info, Code *code) { + code->flag = 0; + code->len = 0; + + const AsmInstTable *pt = NULL; + if (inst->op < (enum Opcode)ARRAY_SIZE(table) && table[inst->op] != NULL) { + for (const AsmInstTable *p = table[inst->op]; p->func != NULL; ++p) { + if (inst->opr1.type == p->opr1_type && inst->opr2.type == p->opr2_type && inst->opr3.type == p->opr3_type) { + pt = p; + break; + } + } + } + + if (pt != NULL) { + unsigned char *p = (*pt->func)(inst, code); + if (p != NULL) { + if (p > code->buf) { + code->inst = inst; + code->len = p - code->buf; + assert((size_t)code->len <= sizeof(code->buf)); + } + return; + } + } + + assemble_error(info, "Illegal opeand"); +} diff --git a/src/as/arch/riscv64/inst.h b/src/as/arch/riscv64/inst.h new file mode 100644 index 000000000..531e4d358 --- /dev/null +++ b/src/as/arch/riscv64/inst.h @@ -0,0 +1,46 @@ +// riscv64 Instruction + +#pragma once + +#include // int64_t + +typedef struct Expr Expr; + +// Must match the order with kOpTable in parse_riscv64.c +enum Opcode { + NOOP, + LI, + RET, +}; + +enum RegType { + NOREG, + + X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, + X16, X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, X30, X31, +}; + +typedef struct { + char no; // 0~31 +} Reg; + +enum OperandType { + NOOPERAND, + REG, // reg + IMMEDIATE, // 1234 +}; + +typedef struct { + enum OperandType type; + union { + Reg reg; + int64_t immediate; + }; +} Operand; + +typedef struct Inst { + enum Opcode op; + Operand opr1; + Operand opr2; + Operand opr3; +} Inst; diff --git a/src/as/arch/riscv64/ir_asm.c b/src/as/arch/riscv64/ir_asm.c new file mode 100644 index 000000000..de827e24a --- /dev/null +++ b/src/as/arch/riscv64/ir_asm.c @@ -0,0 +1,129 @@ +#include "../../../config.h" +#include "ir_asm.h" + +#include "gen_section.h" +#include "table.h" +#include "util.h" + +static LabelInfo *new_label(int section, uintptr_t address) { + LabelInfo *info = malloc_or_die(sizeof(*info)); + info->section = section; + info->flag = 0; + info->address = address; + info->kind = LK_NONE; + return info; +} + +LabelInfo *add_label_table(Table *label_table, const Name *label, int section, bool define, bool global) { + LabelInfo *info = table_get(label_table, label); + if (info != NULL) { + if (define) { + if ((info->flag & LF_DEFINED) != 0) { + fprintf(stderr, "`%.*s' already defined\n", NAMES(label)); + return NULL; + } + info->address = 1; + info->section = section; + } + } else { + info = new_label(section, 0); + table_put(label_table, label, info); + } + if (define) + info->flag |= LF_DEFINED; + if (global) + info->flag |= LF_GLOBAL; + return info; +} + +IR *new_ir_label(const Name *label) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = IR_LABEL; + ir->label = label; + return ir; +} + +IR *new_ir_code(const Code *code) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = IR_CODE; + ir->code = *code; + return ir; +} + +IR *new_ir_data(const void *data, size_t size) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = IR_DATA; + ir->data.len = size; + ir->data.buf = (unsigned char*)data; + return ir; +} + +IR *new_ir_bss(size_t size) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = IR_BSS; + ir->bss = size; + return ir; +} + +IR *new_ir_align(int align) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = IR_ALIGN; + ir->align = align; + return ir; +} + +IR *new_ir_expr(enum IrKind kind, const Expr *expr) { + IR *ir = malloc_or_die(sizeof(*ir)); + ir->kind = kind; + ir->expr = expr; + return ir; +} + +bool calc_label_address(uintptr_t start_address, Vector **section_irs, Table *label_table) { + UNUSED(start_address); + UNUSED(section_irs); + UNUSED(label_table); + return true; +} + +bool resolve_relative_address(Vector **section_irs, Table *label_table, Vector *unresolved) { + UNUSED(section_irs); + UNUSED(label_table); + UNUSED(unresolved); + return true; +} + +void emit_irs(Vector **section_irs) { + for (int sec = 0; sec < SECTION_COUNT; ++sec) { + Vector *irs = section_irs[sec]; + for (int i = 0, len = irs->len; i < len; ++i) { + IR *ir = irs->data[i]; + switch (ir->kind) { + case IR_LABEL: + break; + case IR_CODE: + add_code(ir->code.buf, ir->code.len); + break; + case IR_DATA: + add_section_data(sec, ir->data.buf, ir->data.len); + break; + case IR_BSS: + add_bss(ir->bss); + break; + case IR_ALIGN: + align_section_size(sec, ir->align); + break; + case IR_EXPR_BYTE: + case IR_EXPR_SHORT: + case IR_EXPR_LONG: + case IR_EXPR_QUAD: + { + int64_t zero = 0; + int size = 1 << (ir->kind - IR_EXPR_BYTE); + add_section_data(sec, &zero, size); // TODO: Target endian + } + break; + } + } + } +} diff --git a/src/as/arch/riscv64/parse_riscv64.c b/src/as/arch/riscv64/parse_riscv64.c new file mode 100644 index 000000000..e14b3bf6f --- /dev/null +++ b/src/as/arch/riscv64/parse_riscv64.c @@ -0,0 +1,196 @@ +#include "../../../config.h" +#include "parse_asm.h" + +#include +#include +#include +#include + +#include "util.h" + +// Align with Opcode. +static const char *kOpTable[] = { + "li", + "ret", +}; + +#define ZEROREG X0 +#define RA X1 +#define SP X2 +#define GP X3 +#define TP X4 +#define T0 X5 +#define T1 X6 +#define T2 X7 +#define FP X8 +#define S1 X9 +#define A0 X10 +#define A1 X11 +#define A2 X12 +#define A3 X13 +#define A4 X14 +#define A5 X15 +#define A6 X16 +#define A7 X17 +#define S2 X18 +#define S3 X19 +#define S4 X20 +#define S5 X21 +#define S6 X22 +#define S7 X23 +#define S8 X24 +#define S9 X25 +#define S10 X26 +#define S11 X27 +#define T3 X28 +#define T4 X29 +#define T5 X30 +#define T6 X31 + +static const struct { + const char *name; + enum RegType reg; +} kRegisters[] = { + {"x0", X0}, + {"x1", X1}, + {"x2", X2}, + {"x3", X3}, + {"x4", X4}, + {"x5", X5}, + {"x6", X6}, + {"x7", X7}, + {"x8", X8}, + {"x9", X9}, + {"x10", X10}, + {"x11", X11}, + {"x12", X12}, + {"x13", X13}, + {"x14", X14}, + {"x15", X15}, + {"x16", X16}, + {"x17", X17}, + {"x18", X18}, + {"x19", X19}, + {"x20", X20}, + {"x21", X21}, + {"x22", X22}, + {"x23", X23}, + {"x24", X24}, + {"x25", X25}, + {"x26", X26}, + {"x27", X27}, + {"x28", X28}, + {"x29", X29}, + {"x30", X30}, + {"x31", X31}, + + // Alias + {"zero", ZEROREG}, + {"ra", RA}, + {"sp", SP}, + {"gp", GP}, + {"tp", TP}, + {"t0", T0}, + {"t1", T1}, + {"t2", T2}, + {"fp", FP}, + {"s1", S1}, + {"a0", A0}, + {"a1", A1}, + {"a2", A2}, + {"a3", A3}, + {"a4", A4}, + {"a5", A5}, + {"a6", A6}, + {"a7", A7}, + {"s2", S2}, + {"s3", S3}, + {"s4", S4}, + {"s5", S5}, + {"s6", S6}, + {"s7", S7}, + {"s8", S8}, + {"s9", S9}, + {"s10", S10}, + {"s11", S11}, + {"t3", T3}, + {"t4", T4}, + {"t5", T5}, + {"t6", T6}, +}; + +static int find_match_index(const char **pp, const char **table, size_t count) { + const char *p = *pp; + const char *start = p; + + while (isalnum(*p)) + ++p; + if (*p == '\0' || isspace(*p)) { + size_t n = p - start; + for (size_t i = 0; i < count; ++i) { + const char *name = table[i]; + size_t len = strlen(name); + if (n == len && strncasecmp(start, name, n) == 0) { + *pp = skip_whitespaces(p); + return i; + } + } + } + return -1; +} + +static enum Opcode find_opcode(ParseInfo *info) { + return find_match_index(&info->p, kOpTable, ARRAY_SIZE(kOpTable)) + 1; +} + +static enum RegType find_register(const char **pp) { + const char *p = *pp; + for (int i = 0; i < (int)ARRAY_SIZE(kRegisters); ++i) { + const char *name = kRegisters[i].name; + size_t n = strlen(name); + if (strncmp(p, name, n) == 0) { + *pp = p + n; + return kRegisters[i].reg; + } + } + return NOREG; +} + +static bool parse_operand(ParseInfo *info, Operand *operand) { + enum RegType reg = find_register(&info->p); + if (reg != NOREG) { + operand->type = REG; + operand->reg.no = reg - ZEROREG; + return true; + } + + Expr *expr = parse_expr(info); + if (expr != NULL) { + if (expr->kind == EX_FIXNUM) { + operand->type = IMMEDIATE; + operand->immediate = expr->fixnum; + return true; + } + } + + return false; +} + +void parse_inst(ParseInfo *info, Inst *inst) { + Operand *opr_table[] = {&inst->opr1, &inst->opr2, &inst->opr3}; + for (int i = 0; i < (int)ARRAY_SIZE(opr_table); ++i) + opr_table[i]->type = NOOPERAND; + + enum Opcode op = find_opcode(info); + inst->op = op; + if (op != NOOP) { + for (int i = 0; i < (int)ARRAY_SIZE(opr_table); ++i) { + if (!parse_operand(info, opr_table[i])) + break; + info->p = skip_whitespaces(info->p); + if (i == (int)ARRAY_SIZE(opr_table) - 1 || *info->p != ',') + break; + info->p = skip_whitespaces(info->p + 1); + } + } +} diff --git a/src/as/asm_x86.c b/src/as/arch/x64/asm_code.c similarity index 99% rename from src/as/asm_x86.c rename to src/as/arch/x64/asm_code.c index 7c39672cc..3d3de3461 100644 --- a/src/as/asm_x86.c +++ b/src/as/arch/x64/asm_code.c @@ -1,5 +1,5 @@ -#include "../config.h" -#include "asm_x86.h" +#include "../../../config.h" +#include "asm_code.h" #include #include // exit diff --git a/src/as/inst.h b/src/as/arch/x64/inst.h similarity index 62% rename from src/as/inst.h rename to src/as/arch/x64/inst.h index 8694ee68f..f6b8aeae4 100644 --- a/src/as/inst.h +++ b/src/as/arch/x64/inst.h @@ -1,12 +1,13 @@ -// X86 Instruction +// x64 Instruction #pragma once #include // int64_t +typedef struct Expr Expr; typedef struct Name Name; -// Must match the order with kOpTable in parse_asm.c +// Must match the order with kOpTable in parse_x64.c enum Opcode { NOOP, MOV, @@ -118,101 +119,33 @@ enum RegType { NOREG, // 8bit - AL, - CL, - DL, - BL, + AL, CL, DL, BL, // 8bit (high) - AH, - CH, - DH, - BH, + AH, CH, DH, BH, // 8bit - R8B, - R9B, - R10B, - R11B, - R12B, - R13B, - R14B, - R15B, + R8B, R9B, R10B, R11B, + R12B, R13B, R14B, R15B, // 8bit: corresponds to AH~ in lower 4bit to handle easily. SPL = R15B + 1 + 4, - BPL, - SIL, - DIL, + BPL, SIL, DIL, // 16bit - AX, - CX, - DX, - BX, - SP, - BP, - SI, - DI, - - // 16bit - R8W, - R9W, - R10W, - R11W, - R12W, - R13W, - R14W, - R15W, + AX, CX, DX, BX, SP, BP, SI, DI, + R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W, // 32bit - EAX, - ECX, - EDX, - EBX, - ESP, - EBP, - ESI, - EDI, - - // 32bit - R8D, - R9D, - R10D, - R11D, - R12D, - R13D, - R14D, - R15D, + EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, + R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D, // 64bit - RAX, - RCX, - RDX, - RBX, - RSP, - RBP, - RSI, - RDI, - - // 64bit - R8, - R9, - R10, - R11, - R12, - R13, - R14, - R15, - + RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, + R8, R9, R10, R11, R12, R13, R14, R15, RIP, // Segment register - CS, - DS, - ES, - FS, - GS, - SS, + CS, DS, ES, FS, GS, SS, }; enum RegXmmType { @@ -248,45 +181,11 @@ enum OperandType { SEGMENT_OFFSET, }; -enum ExprKind { - EX_LABEL, - EX_FIXNUM, - EX_POS, - EX_NEG, - EX_ADD, - EX_SUB, - EX_MUL, - EX_DIV, - EX_FLONUM, -}; - -#ifndef __NO_FLONUM -typedef long double Flonum; -#endif - -typedef struct Expr { - enum ExprKind kind; - union { - const Name *label; - int64_t fixnum; - struct { - struct Expr *lhs; - struct Expr *rhs; - } bop; - struct { - struct Expr *sub; - } unary; -#ifndef __NO_FLONUM - Flonum flonum; -#endif - }; -} Expr; - typedef struct { enum OperandType type; union { Reg reg; - long immediate; + int64_t immediate; struct { Expr *expr; } direct; @@ -313,24 +212,3 @@ typedef struct Inst { Operand src; Operand dst; } Inst; - -enum DirectiveType { - NODIRECTIVE, - DT_ASCII, - DT_SECTION, - DT_TEXT, - DT_DATA, - DT_ALIGN, - DT_P2ALIGN, - DT_TYPE, - DT_BYTE, - DT_SHORT, - DT_LONG, - DT_QUAD, - DT_COMM, - DT_GLOBL, - DT_LOCAL, - DT_EXTERN, - DT_FLOAT, - DT_DOUBLE, -}; diff --git a/src/as/ir_asm.c b/src/as/arch/x64/ir_asm.c similarity index 84% rename from src/as/ir_asm.c rename to src/as/arch/x64/ir_asm.c index 4be16f924..55ec3557e 100644 --- a/src/as/ir_asm.c +++ b/src/as/arch/x64/ir_asm.c @@ -1,4 +1,4 @@ -#include "../config.h" +#include "../../../config.h" #include "ir_asm.h" #include @@ -7,6 +7,7 @@ #include "gen_section.h" #include "inst.h" +#include "parse_asm.h" #include "table.h" #include "util.h" @@ -160,77 +161,6 @@ static void put_value(unsigned char *p, intptr_t value, int size) { } } -typedef struct { - const Name *label; - int64_t offset; -} Value; - -static Value calc_expr(Table *label_table, const Expr *expr) { - assert(expr != NULL); - switch (expr->kind) { - case EX_LABEL: - return (Value){.label = expr->label, .offset = 0}; - case EX_FIXNUM: - return (Value){.label = NULL, .offset = expr->fixnum}; - case EX_ADD: - case EX_SUB: - case EX_MUL: - case EX_DIV: - { - Value lhs = calc_expr(label_table, expr->bop.lhs); - Value rhs = calc_expr(label_table, expr->bop.rhs); - if (rhs.label != NULL) { - if (expr->kind == EX_SUB && lhs.label != NULL) { - LabelInfo *llabel, *rlabel; - if (table_try_get(label_table, lhs.label, (void**)&llabel) && - table_try_get(label_table, rhs.label, (void**)&rlabel)) { - return (Value){.label = NULL, .offset = llabel->address - rlabel->address}; - } else { - error("Unresolved"); - } - } - if (expr->kind != EX_ADD || lhs.label != NULL) { - error("Illegal expression"); - } - // offset + label - return (Value){.label = rhs.label, .offset = lhs.offset + rhs.offset}; - } - if (lhs.label != NULL) { - if (expr->kind != EX_ADD) { - error("Illegal expression"); - } - // label + offset - return (Value){.label = lhs.label, .offset = lhs.offset + rhs.offset}; - } - - assert(lhs.label == NULL && rhs.label == NULL); - switch (expr->kind) { - case EX_ADD: lhs.offset += rhs.offset; break; - case EX_SUB: lhs.offset -= rhs.offset; break; - case EX_MUL: lhs.offset *= rhs.offset; break; - case EX_DIV: lhs.offset /= rhs.offset; break; - default: assert(false); break; - } - return lhs; - } - - case EX_POS: - case EX_NEG: - { - Value value = calc_expr(label_table, expr->unary.sub); - if (value.label != NULL) { - error("Illegal expression"); - } - if (expr->kind == EX_NEG) - value.offset = -value.offset; - return value; - } - - default: assert(false); break; - } - return (Value){.label = NULL, .offset = 0}; -} - static bool make_jmp_long(IR *ir) { if (ir->code.flag & INST_LONG_OFFSET) return false; diff --git a/src/as/arch/x64/parse_x64.c b/src/as/arch/x64/parse_x64.c new file mode 100644 index 000000000..b0f00aa54 --- /dev/null +++ b/src/as/arch/x64/parse_x64.c @@ -0,0 +1,434 @@ +#include "../../../config.h" +#include "parse_asm.h" + +#include +#include +#include +#include +#include + +#include "util.h" + +// Align with Opcode. +static const char *kOpTable[] = { + "mov", + "movb", "movw", "movl", "movq", + "movsx", "movzx", + "lea", + + "add", "addq", + "sub", "subq", + "mul", + "div", "idiv", + "neg", + "not", + "inc", "incb", "incw", "incl", "incq", + "dec", "decb", "decw", "decl", "decq", + "and", + "or", + "xor", + "shl", + "shr", + "sar", + "cmp", + "test", + "cwtl", "cltd", "cqto", + + "seto", "setno", "setb", "setae", "sete", "setne", "setbe", "seta", + "sets", "setns", "setp", "setnp", "setl", "setge", "setle", "setg", + + "jmp", + "jo", "jno", "jb", "jae", "je", "jne", "jbe", "ja", + "js", "jns", "jp", "jnp", "jl", "jge", "jle", "jg", + "call", "ret", + "push", "pop", + + "int", + "syscall", + + "movsd", + "addsd", + "subsd", + "mulsd", + "divsd", + "ucomisd", + "cvtsi2sd", "cvttsd2si", + "sqrtsd", + + "movss", + "addss", + "subss", + "mulss", + "divss", + "ucomiss", + "cvtsi2ss", "cvttss2si", + "cvtsd2ss", "cvtss2sd", +}; + +static const struct { + const char *name; + enum RegType reg; +} kRegisters[] = { + // 8bit + {"al", AL}, {"cl", CL}, {"dl", DL}, {"bl", BL}, + {"ah", AH}, {"ch", CH}, {"dh", DH}, {"bh", BH}, + {"r8b", R8B}, {"r9b", R9B}, {"r10b", R10B}, {"r11b", R11B}, + {"r12b", R12B}, {"r13b", R13B}, {"r14b", R14B}, {"r15b", R15B}, + {"spl", SPL}, {"bpl", BPL}, {"sil", SIL}, {"dil", DIL}, + + // 16bit + {"ax", AX}, {"cx", CX}, {"dx", DX}, {"bx", BX}, + {"sp", SP}, {"bp", BP}, {"si", SI}, {"di", DI}, + {"r8w", R8W}, {"r9w", R9W}, {"r10w", R10W}, {"r11w", R11W}, + {"r12w", R12W}, {"r13w", R13W}, {"r14w", R14W}, {"r15w", R15W}, + + // 32bit + {"eax", EAX}, {"ecx", ECX}, {"edx", EDX}, {"ebx", EBX}, + {"esp", ESP}, {"ebp", EBP}, {"esi", ESI}, {"edi", EDI}, + {"r8d", R8D}, {"r9d", R9D}, {"r10d", R10D}, {"r11d", R11D}, + {"r12d", R12D}, {"r13d", R13D}, {"r14d", R14D}, {"r15d", R15D}, + + // 64bit + {"rax", RAX}, {"rcx", RCX}, {"rdx", RDX}, {"rbx", RBX}, + {"rsp", RSP}, {"rbp", RBP}, {"rsi", RSI}, {"rdi", RDI}, + {"r8", R8}, {"r9", R9}, {"r10", R10}, {"r11", R11}, + {"r12", R12}, {"r13", R13}, {"r14", R14}, {"r15", R15}, + {"rip", RIP}, + + // Segment register + {"cs", CS}, {"ds", DS}, {"es", ES}, {"fs", FS}, {"gs", GS}, {"ss", SS}, +}; + +static const char kXmmRegisters[][6] = { + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", +}; + +inline bool is_reg8(enum RegType reg) { + return reg >= AL && reg <= DIL; +} + +inline bool is_reg16(enum RegType reg) { + return reg >= AX && reg <= R15W; +} + +inline bool is_reg32(enum RegType reg) { + return reg >= EAX && reg <= R15D; +} + +inline bool is_reg64(enum RegType reg) { + return reg >= RAX && reg <= R15; +} + +inline bool is_segment(enum RegType reg) { + return reg >= CS && reg <= SS; +} + +static int find_match_index(const char **pp, const char **table, size_t count) { + const char *p = *pp; + const char *start = p; + + while (isalnum(*p)) + ++p; + if (*p == '\0' || isspace(*p)) { + size_t n = p - start; + for (size_t i = 0; i < count; ++i) { + const char *name = table[i]; + size_t len = strlen(name); + if (n == len && strncasecmp(start, name, n) == 0) { + *pp = skip_whitespaces(p); + return i; + } + } + } + return -1; +} + +static enum Opcode find_opcode(ParseInfo *info) { + return find_match_index(&info->p, kOpTable, ARRAY_SIZE(kOpTable)) + 1; +} + +static enum RegType find_register(const char **pp) { + const char *p = *pp; + for (int i = 0; i < (int)ARRAY_SIZE(kRegisters); ++i) { + const char *name = kRegisters[i].name; + size_t n = strlen(name); + if (strncmp(p, name, n) == 0) { + *pp = p + n; + return kRegisters[i].reg; + } + } + return NOREG; +} + +static enum RegXmmType find_xmm_register(const char **pp) { + const char *p = *pp; + const char *q; + for (q = p; isalnum(*q); ++q) + ; + size_t l = q - p; + + for (int i = 0; i < (int)ARRAY_SIZE(kXmmRegisters); ++i) { + const char *name = kXmmRegisters[i]; + size_t n = strlen(name); + if (l == n && strncmp(p, name, n) == 0) { + *pp = p + n; + return i + XMM0; + } + } + return NOREGXMM; +} + +static enum RegType parse_direct_register(ParseInfo *info, Operand *operand) { + { + enum RegXmmType regxmm = find_xmm_register(&info->p); + if (regxmm != NOREGXMM) { + operand->type = REG_XMM; + operand->regxmm = regxmm; + return true; + } + } + + enum RegType reg = find_register(&info->p); + if (is_segment(reg)) { + Expr *offset = NULL; + if (*info->p == ':') { + ++info->p; + offset = parse_expr(info); + } + operand->type = SEGMENT_OFFSET; + operand->segment.reg = reg; + operand->segment.offset = offset; + return true; + } + + enum RegSize size; + int no; + if (is_reg8(reg)) { + size = REG8; + no = reg - AL; + } else if (is_reg16(reg)) { + size = REG16; + no = reg - AX; + } else if (is_reg32(reg)) { + size = REG32; + no = reg - EAX; + } else if (is_reg64(reg)) { + size = REG64; + no = reg - RAX; + } else { + parse_error(info, "Illegal register"); + return false; + } + + operand->type = REG; + operand->reg.size = size; + operand->reg.no = no & 7; + operand->reg.x = no >> 3; + return true; +} + +static bool parse_indirect_register(ParseInfo *info, Expr *offset, Operand *operand) { + enum RegType index_reg = NOREG; + Expr *scale = NULL; + // Already read "(%". + enum RegType base_reg = find_register(&info->p); + + info->p = skip_whitespaces(info->p); + if (*info->p == ',') { + info->p = skip_whitespaces(info->p + 1); + if (*info->p != '%' || + (++info->p, index_reg = find_register(&info->p), !is_reg64(index_reg))) + parse_error(info, "Register expected"); + info->p = skip_whitespaces(info->p); + if (*info->p == ',') { + info->p = skip_whitespaces(info->p + 1); + scale = parse_expr(info); + if (scale->kind != EX_FIXNUM) + parse_error(info, "constant value expected"); + info->p = skip_whitespaces(info->p); + } + } + if (*info->p != ')') + parse_error(info, "`)' expected"); + else + ++info->p; + + if (!(is_reg64(base_reg) || (base_reg == RIP && index_reg == NOREG))) + parse_error(info, "Register expected"); + + if (index_reg == NOREG) { + char no = base_reg - RAX; + operand->type = INDIRECT; + operand->indirect.reg.size = REG64; + operand->indirect.reg.no = base_reg != RIP ? no & 7 : RIP; + operand->indirect.reg.x = (no & 8) >> 3; + operand->indirect.offset = offset; + } else { + if (!is_reg64(index_reg)) + parse_error(info, "Register expected"); + + operand->type = INDIRECT_WITH_INDEX; + operand->indirect_with_index.offset = offset; + operand->indirect_with_index.scale = scale; + char base_no = base_reg - RAX; + operand->indirect_with_index.base_reg.size = REG64; + operand->indirect_with_index.base_reg.no = base_no & 7; + operand->indirect_with_index.base_reg.x = (base_no & 8) >> 3; + char index_no = index_reg - RAX; + operand->indirect_with_index.index_reg.size = REG64; + operand->indirect_with_index.index_reg.no = index_no & 7; + operand->indirect_with_index.index_reg.x = (index_no & 8) >> 3; + } + + return true; +} + +static enum RegType parse_deref_register(ParseInfo *info, Operand *operand) { + enum RegType reg = find_register(&info->p); + if (!is_reg64(reg)) + parse_error(info, "Illegal register"); + + char no = reg - RAX; + operand->type = DEREF_REG; + operand->reg.size = REG64; + operand->reg.no = no & 7; + operand->reg.x = (no & 8) >> 3; + return true; +} + +static bool parse_deref_indirect(ParseInfo *info, Operand *operand) { + Expr *offset = parse_expr(info); + info->p = skip_whitespaces(info->p); + if (*info->p != '(') { + parse_error(info, "direct number not implemented"); + return false; + } + if (info->p[1] != '%') { + parse_error(info, "Register expected"); + return false; + } + info->p += 2; + + enum RegType index_reg = NOREG; + Expr *scale = NULL; + // Already read "(%". + enum RegType base_reg = find_register(&info->p); + + info->p = skip_whitespaces(info->p); + if (*info->p == ',') { + info->p = skip_whitespaces(info->p + 1); + if (*info->p != '%' || + (++info->p, index_reg = find_register(&info->p), !is_reg64(index_reg))) + parse_error(info, "Register expected"); + info->p = skip_whitespaces(info->p); + if (*info->p == ',') { + info->p = skip_whitespaces(info->p + 1); + scale = parse_expr(info); + if (scale->kind != EX_FIXNUM) + parse_error(info, "constant value expected"); + info->p = skip_whitespaces(info->p); + } + } + if (*info->p != ')') + parse_error(info, "`)' expected"); + else + ++info->p; + + if (!is_reg64(base_reg) || (index_reg != NOREG && !is_reg64(index_reg))) + parse_error(info, "Register expected"); + + if (index_reg == NOREG) { + operand->type = DEREF_INDIRECT; + operand->indirect.offset = offset; + char reg_no = base_reg - RAX; + operand->indirect.reg.size = REG64; + operand->indirect.reg.no = reg_no & 7; + operand->indirect.reg.x = (reg_no & 8) >> 3; + } else { + operand->type = DEREF_INDIRECT_WITH_INDEX; + operand->indirect_with_index.offset = offset; + operand->indirect_with_index.scale = scale; + char base_no = base_reg - RAX; + operand->indirect_with_index.base_reg.size = REG64; + operand->indirect_with_index.base_reg.no = base_no & 7; + operand->indirect_with_index.base_reg.x = (base_no & 8) >> 3; + operand->indirect_with_index.index_reg.size = REG64; + char index_no = index_reg - RAX; + operand->indirect_with_index.index_reg.size = REG64; + operand->indirect_with_index.index_reg.no = index_no & 7; + operand->indirect_with_index.index_reg.x = (index_no & 8) >> 3; + } + + return true; +} + +static bool parse_operand(ParseInfo *info, Operand *operand) { + const char *p = info->p; + if (*p == '%') { + info->p = p + 1; + return parse_direct_register(info, operand); + } + + if (*p == '*') { + if (p[1] == '%') { + info->p = p + 2; + return parse_deref_register(info, operand); + } else { + info->p = p + 1; + return parse_deref_indirect(info, operand); + } + } + + if (*p == '$') { + info->p = p + 1; + if (!immediate(&info->p, &operand->immediate)) + parse_error(info, "Syntax error"); + operand->type = IMMEDIATE; + return true; + } + + Expr *expr = parse_expr(info); + info->p = skip_whitespaces(info->p); + if (*info->p != '(') { + if (expr != NULL) { + if (expr->kind == EX_LABEL || expr->kind == EX_FIXNUM) { + operand->type = DIRECT; + operand->direct.expr = expr; + return true; + } + parse_error(info, "direct number not implemented"); + } + } else { + if (info->p[1] == '%') { + info->p += 2; + if (expr == NULL) { + expr = malloc_or_die(sizeof(*expr)); + expr->kind = EX_FIXNUM; + expr->fixnum = 0; + } + return parse_indirect_register(info, expr, operand); + } + } + + return false; +} + +void parse_inst(ParseInfo *info, Inst *inst) { + Operand *opr_table[] = {&inst->src, &inst->dst}; + for (int i = 0; i < (int)ARRAY_SIZE(opr_table); ++i) + opr_table[i]->type = NOOPERAND; + + enum Opcode op = find_opcode(info); + inst->op = op; + if (op != NOOP) { + if (parse_operand(info, &inst->src)) { + info->p = skip_whitespaces(info->p); + if (*info->p == ',') { + info->p = skip_whitespaces(info->p + 1); + parse_operand(info, &inst->dst); + info->p = skip_whitespaces(info->p); + } + } + } +} diff --git a/src/as/as.c b/src/as/as.c index e19ca0ad8..6fc6ae376 100644 --- a/src/as/as.c +++ b/src/as/as.c @@ -9,7 +9,7 @@ #include #include -#include "asm_x86.h" +#include "asm_code.h" #include "elfutil.h" #include "gen_section.h" #include "ir_asm.h" @@ -130,7 +130,12 @@ static int output_obj(const char *ofn, Table *label_table, Vector *unresolved) { uintptr_t entry = 0; int phnum = 0; int shnum = 11; - out_elf_header(ofp, entry, phnum, shnum); +#if XCC_TARGET_ARCH == XCC_ARCH_RISCV64 + const int flags = EF_RISCV_RVC | EF_RISCV_FLOAT_ABI_DOUBLE; +#else + const int flags = 0; +#endif + out_elf_header(ofp, entry, phnum, shnum, flags); uintptr_t addr = sizeof(Elf64_Ehdr); uintptr_t code_ofs = addr; @@ -398,7 +403,7 @@ static int output_obj(const char *ofn, Table *label_table, Vector *unresolved) { fwrite(&shstrtabsec, sizeof(shstrtabsec), 1, ofp); // Write section table offset. - fseek(ofp, 0x28, SEEK_SET); + fseek(ofp, offsetof(Elf64_Ehdr, e_shoff), SEEK_SET); putnum(ofp, sh_ofs, 8); } diff --git a/src/as/asm_x86.h b/src/as/asm_code.h similarity index 95% rename from src/as/asm_x86.h rename to src/as/asm_code.h index 444b0313c..790ed1e12 100644 --- a/src/as/asm_x86.h +++ b/src/as/asm_code.h @@ -1,4 +1,4 @@ -// Generate code for X86 +// Generate code for each architecture. #pragma once diff --git a/src/as/ir_asm.h b/src/as/ir_asm.h index 97a89892b..8473304c1 100644 --- a/src/as/ir_asm.h +++ b/src/as/ir_asm.h @@ -5,7 +5,7 @@ #include // size_t #include // uintptr_t -#include "asm_x86.h" // Code +#include "asm_code.h" // Code typedef struct Expr Expr; typedef struct Name Name; diff --git a/src/as/parse_asm.c b/src/as/parse_asm.c index e4b887813..e507a4128 100644 --- a/src/as/parse_asm.c +++ b/src/as/parse_asm.c @@ -3,7 +3,6 @@ #include #include -#include #include #include // strtoul #include @@ -14,210 +13,8 @@ #include "table.h" #include "util.h" -static Expr *parse_expr(ParseInfo *info); - -// Align with Opcode. -static const char *kOpTable[] = { - "mov", - "movb", - "movw", - "movl", - "movq", - "movsx", - "movzx", - "lea", - - "add", - "addq", - "sub", - "subq", - "mul", - "div", - "idiv", - "neg", - "not", - "inc", - "incb", - "incw", - "incl", - "incq", - "dec", - "decb", - "decw", - "decl", - "decq", - "and", - "or", - "xor", - "shl", - "shr", - "sar", - "cmp", - "test", - "cwtl", - "cltd", - "cqto", - - "seto", - "setno", - "setb", - "setae", - "sete", - "setne", - "setbe", - "seta", - "sets", - "setns", - "setp", - "setnp", - "setl", - "setge", - "setle", - "setg", - - "jmp", - "jo", - "jno", - "jb", - "jae", - "je", - "jne", - "jbe", - "ja", - "js", - "jns", - "jp", - "jnp", - "jl", - "jge", - "jle", - "jg", - "call", - "ret", - "push", - "pop", - - "int", - "syscall", - - "movsd", - "addsd", - "subsd", - "mulsd", - "divsd", - "ucomisd", - "cvtsi2sd", - "cvttsd2si", - "sqrtsd", - - "movss", - "addss", - "subss", - "mulss", - "divss", - "ucomiss", - "cvtsi2ss", - "cvttss2si", - - "cvtsd2ss", - "cvtss2sd", -}; - -static const struct { - const char *name; - enum RegType reg; -} kRegisters[] = { - {"al", AL}, - {"cl", CL}, - {"dl", DL}, - {"bl", BL}, - {"ah", AH}, - {"ch", CH}, - {"dh", DH}, - {"bh", BH}, - - {"r8b", R8B}, - {"r9b", R9B}, - {"r10b", R10B}, - {"r11b", R11B}, - {"r12b", R12B}, - {"r13b", R13B}, - {"r14b", R14B}, - {"r15b", R15B}, - - {"spl", SPL}, - {"bpl", BPL}, - {"sil", SIL}, - {"dil", DIL}, - - {"ax", AX}, - {"cx", CX}, - {"dx", DX}, - {"bx", BX}, - {"sp", SP}, - {"bp", BP}, - {"si", SI}, - {"di", DI}, - - {"r8w", R8W}, - {"r9w", R9W}, - {"r10w", R10W}, - {"r11w", R11W}, - {"r12w", R12W}, - {"r13w", R13W}, - {"r14w", R14W}, - {"r15w", R15W}, - - {"eax", EAX}, - {"ecx", ECX}, - {"edx", EDX}, - {"ebx", EBX}, - {"esp", ESP}, - {"ebp", EBP}, - {"esi", ESI}, - {"edi", EDI}, - - {"r8d", R8D}, - {"r9d", R9D}, - {"r10d", R10D}, - {"r11d", R11D}, - {"r12d", R12D}, - {"r13d", R13D}, - {"r14d", R14D}, - {"r15d", R15D}, - - {"rax", RAX}, - {"rcx", RCX}, - {"rdx", RDX}, - {"rbx", RBX}, - {"rsp", RSP}, - {"rbp", RBP}, - {"rsi", RSI}, - {"rdi", RDI}, - - {"r8", R8}, - {"r9", R9}, - {"r10", R10}, - {"r11", R11}, - {"r12", R12}, - {"r13", R13}, - {"r14", R14}, - {"r15", R15}, - - {"rip", RIP}, - - {"cs", CS}, - {"ds", DS}, - {"es", ES}, - {"fs", FS}, - {"gs", GS}, - {"ss", SS}, -}; - -static const char kXmmRegisters[][6] = { - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", - "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", -}; +bool err; +int current_section = SEC_CODE; static const char *kDirectiveTable[] = { "ascii", @@ -241,58 +38,12 @@ static const char *kDirectiveTable[] = { #endif }; -bool err; - void parse_error(const ParseInfo *info, const char *message) { fprintf(stderr, "%s(%d): %s\n", info->filename, info->lineno, message); fprintf(stderr, "%s\n", info->rawline); err = true; } -inline bool is_reg8(enum RegType reg) { - return reg >= AL && reg <= DIL; -} - -inline bool is_reg16(enum RegType reg) { - return reg >= AX && reg <= R15W; -} - -inline bool is_reg32(enum RegType reg) { - return reg >= EAX && reg <= R15D; -} - -inline bool is_reg64(enum RegType reg) { - return reg >= RAX && reg <= R15; -} - -inline bool is_segment(enum RegType reg) { - return reg >= CS && reg <= SS; -} - -static int find_match_index(const char **pp, const char **table, size_t count) { - const char *p = *pp; - const char *start = p; - - while (isalnum(*p)) - ++p; - if (*p == '\0' || isspace(*p)) { - size_t n = p - start; - for (size_t i = 0; i < count; ++i) { - const char *name = table[i]; - size_t len = strlen(name); - if (n == len && strncasecmp(start, name, n) == 0) { - *pp = skip_whitespaces(p); - return i; - } - } - } - return -1; -} - -static enum Opcode find_opcode(ParseInfo *info) { - return find_match_index(&info->p, kOpTable, ARRAY_SIZE(kOpTable)) + 1; -} - static enum DirectiveType find_directive(const char *p, size_t n) { const char **table = kDirectiveTable; for (size_t i = 0; i < ARRAY_SIZE(kDirectiveTable); ++i) { @@ -304,38 +55,7 @@ static enum DirectiveType find_directive(const char *p, size_t n) { return NODIRECTIVE; } -static enum RegType find_register(const char **pp) { - const char *p = *pp; - for (int i = 0; i < (int)ARRAY_SIZE(kRegisters); ++i) { - const char *name = kRegisters[i].name; - size_t n = strlen(name); - if (strncmp(p, name, n) == 0) { - *pp = p + n; - return kRegisters[i].reg; - } - } - return NOREG; -} - -static enum RegXmmType find_xmm_register(const char **pp) { - const char *p = *pp; - const char *q; - for (q = p; isalnum(*q); ++q) - ; - size_t l = q - p; - - for (int i = 0; i < (int)ARRAY_SIZE(kXmmRegisters); ++i) { - const char *name = kXmmRegisters[i]; - size_t n = strlen(name); - if (l == n && strncmp(p, name, n) == 0) { - *pp = p + n; - return i + XMM0; - } - } - return NOREGXMM; -} - -static bool immediate(const char **pp, long *value) { +bool immediate(const char **pp, int64_t *value) { const char *p = *pp; bool negative = false; if (*p == '-') { @@ -376,7 +96,7 @@ inline bool is_label_chr(char c) { return is_label_first_chr(c) || isdigit(c); } -static const char *skip_until_delimiter(const char *p) { +const char *skip_until_delimiter(const char *p) { if (*p == '"') { ++p; for (char c; c = *p, c != '\0'; ++p) { @@ -394,7 +114,7 @@ static const char *skip_until_delimiter(const char *p) { return p; } -static const Name *unquote_label(const char *p, const char *q) { +const Name *unquote_label(const char *p, const char *q) { if (*p != '"') return alloc_name(p, q, false); if (q[-1] != '"' || q == p + 2) @@ -454,190 +174,6 @@ static const Name *parse_section_name(ParseInfo *info) { return alloc_name(start, p, false); } -static enum RegType parse_direct_register(ParseInfo *info, Operand *operand) { - { - enum RegXmmType regxmm = find_xmm_register(&info->p); - if (regxmm != NOREGXMM) { - operand->type = REG_XMM; - operand->regxmm = regxmm; - return true; - } - } - - enum RegType reg = find_register(&info->p); - if (is_segment(reg)) { - Expr *offset = NULL; - if (*info->p == ':') { - ++info->p; - offset = parse_expr(info); - } - operand->type = SEGMENT_OFFSET; - operand->segment.reg = reg; - operand->segment.offset = offset; - return true; - } - - enum RegSize size; - int no; - if (is_reg8(reg)) { - size = REG8; - no = reg - AL; - } else if (is_reg16(reg)) { - size = REG16; - no = reg - AX; - } else if (is_reg32(reg)) { - size = REG32; - no = reg - EAX; - } else if (is_reg64(reg)) { - size = REG64; - no = reg - RAX; - } else { - parse_error(info, "Illegal register"); - return false; - } - - operand->type = REG; - operand->reg.size = size; - operand->reg.no = no & 7; - operand->reg.x = no >> 3; - return true; -} - -static bool parse_indirect_register(ParseInfo *info, Expr *offset, Operand *operand) { - enum RegType index_reg = NOREG; - Expr *scale = NULL; - // Already read "(%". - enum RegType base_reg = find_register(&info->p); - - info->p = skip_whitespaces(info->p); - if (*info->p == ',') { - info->p = skip_whitespaces(info->p + 1); - if (*info->p != '%' || - (++info->p, index_reg = find_register(&info->p), !is_reg64(index_reg))) - parse_error(info, "Register expected"); - info->p = skip_whitespaces(info->p); - if (*info->p == ',') { - info->p = skip_whitespaces(info->p + 1); - scale = parse_expr(info); - if (scale->kind != EX_FIXNUM) - parse_error(info, "constant value expected"); - info->p = skip_whitespaces(info->p); - } - } - if (*info->p != ')') - parse_error(info, "`)' expected"); - else - ++info->p; - - if (!(is_reg64(base_reg) || (base_reg == RIP && index_reg == NOREG))) - parse_error(info, "Register expected"); - - if (index_reg == NOREG) { - char no = base_reg - RAX; - operand->type = INDIRECT; - operand->indirect.reg.size = REG64; - operand->indirect.reg.no = base_reg != RIP ? no & 7 : RIP; - operand->indirect.reg.x = (no & 8) >> 3; - operand->indirect.offset = offset; - } else { - if (!is_reg64(index_reg)) - parse_error(info, "Register expected"); - - operand->type = INDIRECT_WITH_INDEX; - operand->indirect_with_index.offset = offset; - operand->indirect_with_index.scale = scale; - char base_no = base_reg - RAX; - operand->indirect_with_index.base_reg.size = REG64; - operand->indirect_with_index.base_reg.no = base_no & 7; - operand->indirect_with_index.base_reg.x = (base_no & 8) >> 3; - char index_no = index_reg - RAX; - operand->indirect_with_index.index_reg.size = REG64; - operand->indirect_with_index.index_reg.no = index_no & 7; - operand->indirect_with_index.index_reg.x = (index_no & 8) >> 3; - } - - return true; -} - -static enum RegType parse_deref_register(ParseInfo *info, Operand *operand) { - enum RegType reg = find_register(&info->p); - if (!is_reg64(reg)) - parse_error(info, "Illegal register"); - - char no = reg - RAX; - operand->type = DEREF_REG; - operand->reg.size = REG64; - operand->reg.no = no & 7; - operand->reg.x = (no & 8) >> 3; - return true; -} - -static bool parse_deref_indirect(ParseInfo *info, Operand *operand) { - Expr *offset = parse_expr(info); - info->p = skip_whitespaces(info->p); - if (*info->p != '(') { - parse_error(info, "direct number not implemented"); - return false; - } - if (info->p[1] != '%') { - parse_error(info, "Register expected"); - return false; - } - info->p += 2; - - enum RegType index_reg = NOREG; - Expr *scale = NULL; - // Already read "(%". - enum RegType base_reg = find_register(&info->p); - - info->p = skip_whitespaces(info->p); - if (*info->p == ',') { - info->p = skip_whitespaces(info->p + 1); - if (*info->p != '%' || - (++info->p, index_reg = find_register(&info->p), !is_reg64(index_reg))) - parse_error(info, "Register expected"); - info->p = skip_whitespaces(info->p); - if (*info->p == ',') { - info->p = skip_whitespaces(info->p + 1); - scale = parse_expr(info); - if (scale->kind != EX_FIXNUM) - parse_error(info, "constant value expected"); - info->p = skip_whitespaces(info->p); - } - } - if (*info->p != ')') - parse_error(info, "`)' expected"); - else - ++info->p; - - if (!is_reg64(base_reg) || (index_reg != NOREG && !is_reg64(index_reg))) - parse_error(info, "Register expected"); - - if (index_reg == NOREG) { - operand->type = DEREF_INDIRECT; - operand->indirect.offset = offset; - char reg_no = base_reg - RAX; - operand->indirect.reg.size = REG64; - operand->indirect.reg.no = reg_no & 7; - operand->indirect.reg.x = (reg_no & 8) >> 3; - } else { - operand->type = DEREF_INDIRECT_WITH_INDEX; - operand->indirect_with_index.offset = offset; - operand->indirect_with_index.scale = scale; - char base_no = base_reg - RAX; - operand->indirect_with_index.base_reg.size = REG64; - operand->indirect_with_index.base_reg.no = base_no & 7; - operand->indirect_with_index.base_reg.x = (base_no & 8) >> 3; - operand->indirect_with_index.index_reg.size = REG64; - char index_no = index_reg - RAX; - operand->indirect_with_index.index_reg.size = REG64; - operand->indirect_with_index.index_reg.no = index_no & 7; - operand->indirect_with_index.index_reg.x = (index_no & 8) >> 3; - } - - return true; -} - enum TokenKind { TK_UNKNOWN, TK_LABEL, @@ -897,85 +433,16 @@ static Expr *parse_add(ParseInfo *info) { return expr; } -static Expr *parse_expr(ParseInfo *info) { +Expr *parse_expr(ParseInfo *info) { info->token = NULL; info->next = NULL; return parse_add(info); } -static bool parse_operand(ParseInfo *info, Operand *operand) { - const char *p = info->p; - if (*p == '%') { - info->p = p + 1; - return parse_direct_register(info, operand); - } - - if (*p == '*') { - if (p[1] == '%') { - info->p = p + 2; - return parse_deref_register(info, operand); - } else { - info->p = p + 1; - return parse_deref_indirect(info, operand); - } - } - - if (*p == '$') { - info->p = p + 1; - if (!immediate(&info->p, &operand->immediate)) - parse_error(info, "Syntax error"); - operand->type = IMMEDIATE; - return true; - } - - Expr *expr = parse_expr(info); - info->p = skip_whitespaces(info->p); - if (*info->p != '(') { - if (expr != NULL) { - if (expr->kind == EX_LABEL || expr->kind == EX_FIXNUM) { - operand->type = DIRECT; - operand->direct.expr = expr; - return true; - } - parse_error(info, "direct number not implemented"); - } - } else { - if (info->p[1] == '%') { - info->p += 2; - if (expr == NULL) { - expr = malloc_or_die(sizeof(*expr)); - expr->kind = EX_FIXNUM; - expr->fixnum = 0; - } - return parse_indirect_register(info, expr, operand); - } - } - - return false; -} - -static void parse_inst(ParseInfo *info, Inst *inst) { - enum Opcode op = find_opcode(info); - inst->op = op; - if (op != NOOP) { - if (parse_operand(info, &inst->src)) { - info->p = skip_whitespaces(info->p); - if (*info->p == ',') { - info->p = skip_whitespaces(info->p + 1); - parse_operand(info, &inst->dst); - info->p = skip_whitespaces(info->p); - } - } - } -} - -int current_section = SEC_CODE; - Line *parse_line(ParseInfo *info) { - Line *line = malloc_or_die(sizeof(*line)); + Line *line = calloc_or_die(sizeof(*line)); line->label = NULL; line->inst.op = NOOP; - line->inst.src.type = line->inst.dst.type = NOOPERAND; line->dir = NODIRECTIVE; const char *p = skip_whitespaces(info->rawline); @@ -1093,13 +560,13 @@ void handle_directive(ParseInfo *info, enum DirectiveType dir, Vector **section_ if (*info->p != ',') parse_error(info, ".comm: `,' expected"); info->p = skip_whitespaces(info->p + 1); - long count; + int64_t count; if (!immediate(&info->p, &count)) { parse_error(info, ".comm: count expected"); return; } - long align = 0; + int64_t align = 0; if (*info->p == ',') { info->p = skip_whitespaces(info->p + 1); if (!immediate(&info->p, &align) || align < 1) { @@ -1130,7 +597,7 @@ void handle_directive(ParseInfo *info, enum DirectiveType dir, Vector **section_ case DT_ALIGN: { - long align; + int64_t align; if (!immediate(&info->p, &align)) parse_error(info, ".align: number expected"); vec_push(irs, new_ir_align(align)); @@ -1138,7 +605,7 @@ void handle_directive(ParseInfo *info, enum DirectiveType dir, Vector **section_ break; case DT_P2ALIGN: { - long align; + int64_t align; if (!immediate(&info->p, &align)) parse_error(info, ".align: number expected"); vec_push(irs, new_ir_align(1 << align)); @@ -1276,3 +743,69 @@ void handle_directive(ParseInfo *info, enum DirectiveType dir, Vector **section_ break; } } + +Value calc_expr(Table *label_table, const Expr *expr) { + assert(expr != NULL); + switch (expr->kind) { + case EX_LABEL: + return (Value){.label = expr->label, .offset = 0}; + case EX_FIXNUM: + return (Value){.label = NULL, .offset = expr->fixnum}; + case EX_ADD: + case EX_SUB: + case EX_MUL: + case EX_DIV: + { + Value lhs = calc_expr(label_table, expr->bop.lhs); + Value rhs = calc_expr(label_table, expr->bop.rhs); + if (rhs.label != NULL) { + if (expr->kind == EX_SUB && lhs.label != NULL) { + LabelInfo *llabel, *rlabel; + if (table_try_get(label_table, lhs.label, (void**)&llabel) && + table_try_get(label_table, rhs.label, (void**)&rlabel)) { + return (Value){.label = NULL, .offset = llabel->address - rlabel->address}; + } else { + error("Unresolved"); + } + } + if (expr->kind != EX_ADD || lhs.label != NULL) { + error("Illegal expression"); + } + // offset + label + return (Value){.label = rhs.label, .offset = lhs.offset + rhs.offset}; + } + if (lhs.label != NULL) { + if (expr->kind != EX_ADD) { + error("Illegal expression"); + } + // label + offset + return (Value){.label = lhs.label, .offset = lhs.offset + rhs.offset}; + } + + assert(lhs.label == NULL && rhs.label == NULL); + switch (expr->kind) { + case EX_ADD: lhs.offset += rhs.offset; break; + case EX_SUB: lhs.offset -= rhs.offset; break; + case EX_MUL: lhs.offset *= rhs.offset; break; + case EX_DIV: lhs.offset /= rhs.offset; break; + default: assert(false); break; + } + return lhs; + } + + case EX_POS: + case EX_NEG: + { + Value value = calc_expr(label_table, expr->unary.sub); + if (value.label != NULL) { + error("Illegal expression"); + } + if (expr->kind == EX_NEG) + value.offset = -value.offset; + return value; + } + + default: assert(false); break; + } + return (Value){.label = NULL, .offset = 0}; +} diff --git a/src/as/parse_asm.h b/src/as/parse_asm.h index 5904d261d..9db85e4fc 100644 --- a/src/as/parse_asm.h +++ b/src/as/parse_asm.h @@ -3,14 +3,36 @@ #pragma once #include +#include // int64_t -#include "inst.h" // Inst, DirectiveType +#include "inst.h" // Inst typedef struct Name Name; typedef struct Table Table; typedef struct Token Token; typedef struct Vector Vector; +enum DirectiveType { + NODIRECTIVE, + DT_ASCII, + DT_SECTION, + DT_TEXT, + DT_DATA, + DT_ALIGN, + DT_P2ALIGN, + DT_TYPE, + DT_BYTE, + DT_SHORT, + DT_LONG, + DT_QUAD, + DT_COMM, + DT_GLOBL, + DT_LOCAL, + DT_EXTERN, + DT_FLOAT, + DT_DOUBLE, +}; + typedef struct ParseInfo { const char *filename; int lineno; @@ -27,6 +49,40 @@ typedef struct Line { enum DirectiveType dir; } Line; +enum ExprKind { + EX_LABEL, + EX_FIXNUM, + EX_POS, + EX_NEG, + EX_ADD, + EX_SUB, + EX_MUL, + EX_DIV, + EX_FLONUM, +}; + +#ifndef __NO_FLONUM +typedef long double Flonum; +#endif + +typedef struct Expr { + enum ExprKind kind; + union { + const Name *label; + int64_t fixnum; + struct { + struct Expr *lhs; + struct Expr *rhs; + } bop; + struct { + struct Expr *sub; + } unary; +#ifndef __NO_FLONUM + Flonum flonum; +#endif + }; +} Expr; + extern int current_section; // enum SectionType extern bool err; @@ -34,3 +90,16 @@ Line *parse_line(ParseInfo *info); void handle_directive(ParseInfo *info, enum DirectiveType dir, Vector **section_irs, Table *label_table); void parse_error(const ParseInfo *info, const char *message); +void parse_inst(ParseInfo *info, Inst *inst); + +bool immediate(const char **pp, int64_t *value); +const char *skip_until_delimiter(const char *p); +const Name *unquote_label(const char *p, const char *q); +Expr *parse_expr(ParseInfo *info); + +typedef struct { + const Name *label; + int64_t offset; +} Value; + +Value calc_expr(Table *label_table, const Expr *expr); diff --git a/src/cc/arch/riscv64/ir_riscv64.c b/src/cc/arch/riscv64/ir_riscv64.c index 03b1f2136..fcb957066 100644 --- a/src/cc/arch/riscv64/ir_riscv64.c +++ b/src/cc/arch/riscv64/ir_riscv64.c @@ -75,7 +75,7 @@ const RegAllocSettings kArchRegAllocSettings = { // -bool is_im12(intptr_t x) { +bool is_im12(int64_t x) { return x <= ((1L << 11) - 1) && x >= -(1L << 11); } @@ -112,7 +112,7 @@ static void ei_iofs(IR *ir) { static void ei_sofs(IR *ir) { assert(ir->opr1->flag & VRF_CONST); const char *dst = kReg64s[ir->dst->phys]; - int ofs = ir->opr1->fixnum; + int64_t ofs = ir->opr1->fixnum; if (is_im12(ofs)) { ADDI(dst, SP, IM(ofs)); } else { diff --git a/src/ld/ld.c b/src/ld/ld.c index 79645a70a..66a2c2830 100644 --- a/src/ld/ld.c +++ b/src/ld/ld.c @@ -475,7 +475,12 @@ static bool output_exe(const char *ofn, uintptr_t entry_address) { size_t rodata_align = MAX(section_aligns[SEC_RODATA], 1); size_t code_rodata_sz = ALIGN(codesz, rodata_align) + rodatasz; - out_elf_header(fp, entry_address, phnum, 0); +#if XCC_TARGET_ARCH == XCC_ARCH_RISCV64 + const int flags = EF_RISCV_RVC | EF_RISCV_FLOAT_ABI_DOUBLE; +#else + const int flags = 0; +#endif + out_elf_header(fp, entry_address, phnum, 0, flags); out_program_header(fp, 0, PROG_START, codeloadadr, code_rodata_sz, code_rodata_sz); if (phnum > 1) { size_t bss_align = MAX(section_aligns[SEC_BSS], 1); diff --git a/src/util/elfutil.c b/src/util/elfutil.c index 6d05271a0..9fd4ca4ef 100644 --- a/src/util/elfutil.c +++ b/src/util/elfutil.c @@ -72,7 +72,7 @@ Elf64_Sym *symtab_add(Symtab *symtab, const Name *name) { // -void out_elf_header(FILE *fp, uintptr_t entry, int phnum, int shnum) { +void out_elf_header(FILE *fp, uintptr_t entry, int phnum, int shnum, int flags) { Elf64_Ehdr ehdr = { .e_ident = { ELFMAG0, ELFMAG1, ELFMAG2 ,ELFMAG3, ELFCLASS64, ELFDATA2LSB, EV_CURRENT, ELFOSABI_SYSV }, @@ -82,7 +82,7 @@ void out_elf_header(FILE *fp, uintptr_t entry, int phnum, int shnum) { .e_entry = entry, .e_phoff = phnum > 0 ? sizeof(Elf64_Ehdr) : 0, .e_shoff = 0, // dummy - .e_flags = 0x0, + .e_flags = flags, .e_ehsize = sizeof(Elf64_Ehdr), .e_phentsize = phnum > 0 ? sizeof(Elf64_Phdr) : 0, .e_phnum = phnum, diff --git a/src/util/elfutil.h b/src/util/elfutil.h index 76b22d500..4ab2027e4 100644 --- a/src/util/elfutil.h +++ b/src/util/elfutil.h @@ -37,6 +37,6 @@ Elf64_Sym *symtab_add(Symtab *symtab, const Name *name); // -void out_elf_header(FILE *fp, uintptr_t entry, int phnum, int shnum); +void out_elf_header(FILE *fp, uintptr_t entry, int phnum, int shnum, int flags); void out_program_header(FILE *fp, int sec, uintptr_t offset, uintptr_t vaddr, size_t filesz, size_t memsz);