diff --git a/Makefile b/Makefile index fe9b5f227..b4f0cd1c6 100644 --- a/Makefile +++ b/Makefile @@ -16,8 +16,6 @@ LIB_DIR:=lib # NO_VLA:=1 # NO_WCHAR:=1 -# HOST_CC_PREFIX=riscv64-unknown-elf- - ifeq ("$(ARCHTYPE)", "") ARCHTYPE:=x64 ARCH:=$(shell arch) @@ -52,6 +50,10 @@ ifneq ("$(NO_WCHAR)","") CFLAGS+=-D__NO_WCHAR endif +ifneq ("$(PLATFORM)","") +PLATFORM_UPPER:=$(shell echo "$(PLATFORM)" | tr \'[a-z]\' \'[A-Z]\') +CFLAGS+=-DXCC_TARGET_PLATFORM=XCC_PLATFORM_$(PLATFORM_UPPER) +endif ifneq ("$(HOST_CC_PREFIX)","") CFLAGS+=-DHOST_CC_PREFIX=$(HOST_CC_PREFIX) endif @@ -137,6 +139,13 @@ clean: @$(MAKE) -C libsrc clean @$(MAKE) -C tests clean +# Run tests on RISC-V simulator. +.PHONY: test-riscv64 +test-riscv64: + $(MAKE) ARCHTYPE:=riscv64 PLATFORM:=posix HOST_CC_PREFIX=riscv64-unknown-elf- + $(MAKE) -C tests clean && \ + $(MAKE) RUN_EXE="$(CURDIR)/tool/run-riscv64" NO_LINK_TEST=1 -C tests all + ### Library .PHONY: libs diff --git a/include/math.h b/include/math.h index b6289ff4f..f402dfd91 100644 --- a/include/math.h +++ b/include/math.h @@ -47,7 +47,7 @@ int isfinite(double x); int isnan(double x); int isinf(double x); -#if defined(__APPLE__) || defined(__GNUC__) +#if defined(__APPLE__) || defined(__GNUC__) || defined(__riscv) // isfinite, isinf and isnan is defined by macro and not included in lib file, // so it will be link error. #include diff --git a/include/stdarg.h b/include/stdarg.h index 265915c3f..8941176ac 100644 --- a/include/stdarg.h +++ b/include/stdarg.h @@ -22,6 +22,14 @@ typedef void **va_list; #define va_arg(ap, type) (*(type*)(ap)++) // Assume little endian #define va_copy(dst,src) (dst = src) +#elif defined(__riscv) +typedef void **va_list; + +#define va_start(ap,p) __builtin_va_start(ap,&(p)) +#define va_end(ap) /*(void)*/(ap = 0) +#define va_arg(ap, type) ((ap) += 1, *(type*)((ap) - 1)) // Assume little endian +#define va_copy(dst,src) (dst = src) + #else // not __APPLE__ nor __aarch64__ #include @@ -43,7 +51,7 @@ typedef __gnuc_va_list va_list; #define va_arg(ap,ty) __builtin_va_arg(ap,ty) #define va_copy(dst,src) __builtin_va_copy(dst,src) -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(__riscv) #define __GP_REG_ARGS (8) #else #define __GP_REG_ARGS (6) diff --git a/include/stdio.h b/include/stdio.h index 81558451b..4b0dde02b 100644 --- a/include/stdio.h +++ b/include/stdio.h @@ -21,6 +21,23 @@ extern FILE *__stderrp; #define stdin __stdinp #define stdout __stdoutp #define stderr __stderrp + +#elif defined(__riscv) + +// Must match with newlib +struct _reent +{ + int _errno; + + struct FILE *_stdin, *_stdout, *_stderr; +}; + +extern struct _reent *_impure_ptr; + +#define stdin (_impure_ptr->_stdin) +#define stdout (_impure_ptr->_stdout) +#define stderr (_impure_ptr->_stderr) + #else extern FILE *stdin; extern FILE *stdout; diff --git a/src/cc/arch/riscv64/arch_config.h b/src/cc/arch/riscv64/arch_config.h new file mode 100644 index 000000000..75154db53 --- /dev/null +++ b/src/cc/arch/riscv64/arch_config.h @@ -0,0 +1,13 @@ +#pragma once + +// Configuration for riscv64 + +#define MAX_REG_ARGS (8) +#define MAX_FREG_ARGS (8) + +#define PHYSICAL_REG_TEMPORARY (8) +#define PHYSICAL_REG_MAX (PHYSICAL_REG_TEMPORARY + 14) +#define PHYSICAL_FREG_TEMPORARY (8) +#define PHYSICAL_FREG_MAX (PHYSICAL_FREG_TEMPORARY + 24) + +#define GET_FPREG_INDEX() 18 diff --git a/src/cc/arch/riscv64/emit_code.c b/src/cc/arch/riscv64/emit_code.c new file mode 100644 index 000000000..decb2bf77 --- /dev/null +++ b/src/cc/arch/riscv64/emit_code.c @@ -0,0 +1,601 @@ +#include "../../../config.h" +#include "./arch_config.h" +#include "emit_code.h" + +#include +#include // PRId64 +#include +#include + +#include "ast.h" +#include "codegen.h" +#include "initializer.h" // calc_bitfield_initial_value +#include "ir.h" +#include "lexer.h" +#include "regalloc.h" +#include "riscv64.h" +#include "table.h" +#include "type.h" +#include "util.h" +#include "var.h" + +char *im(int64_t x) { + return fmt("%" PRId64, x); +} + +char *immediate_offset(int offset, const char *reg) { + return offset != 0 ? fmt("%d(%s)", offset, reg) : fmt("(%s)", reg); +} + +char *label_offset_hi(char *label) { + return fmt("%%hi(%s)", label); +} + +char *label_offset_lo(char *label) { + return fmt("%%lo(%s)", label); +} + +//////// + +static void eval_initial_value(Expr *expr, Expr **pvar, Fixnum *poffset) { + switch (expr->kind) { + case EX_FIXNUM: + *poffset = expr->fixnum; + break; + case EX_VAR: + assert(*pvar == NULL); + *pvar = expr; + break; + case EX_ADD: + case EX_SUB: + { + Expr *var1 = NULL, *var2 = NULL; + Fixnum offset1 = 0, offset2 = 0; + eval_initial_value(expr->bop.lhs, &var1, &offset1); + eval_initial_value(expr->bop.rhs, &var2, &offset2); + if (var1 != NULL) { + assert(var2 == NULL); + *pvar = var1; + } else if (var2 != NULL) { + assert(expr->kind == EX_ADD); + *pvar = var2; + } + if (expr->kind == EX_SUB) + offset2 = -offset2; + *poffset = offset1 + offset2; + } + break; + case EX_REF: + case EX_DEREF: + case EX_CAST: + eval_initial_value(expr->unary.sub, pvar, poffset); + break; + case EX_MEMBER: + { + eval_initial_value(expr->member.target, pvar, poffset); + const MemberInfo *minfo = expr->member.info; + *poffset += minfo->offset; + } + break; + case EX_COMPLIT: + assert(expr->complit.var->kind == EX_VAR); + eval_initial_value(expr->complit.var, pvar, poffset); + break; + // case EX_STR: // should be handled in parser. + default: assert(!"illegal"); break; + } +} + +#ifndef __NO_BITFIELD +static int construct_initial_value_bitfield(const StructInfo *sinfo, const Initializer *init, + int start, int *poffset) { + const MemberInfo *member = &sinfo->members[start]; + if (member->bitfield.width == 0) + return start; + + const Type *et = get_fixnum_type(member->bitfield.base_kind, false, 0); + int offset = *poffset; + int align = align_size(et); + if (offset % align != 0) { + EMIT_ALIGN(align); + offset = ALIGN(offset, align); + } + + int i = start; + Fixnum x = calc_bitfield_initial_value(sinfo, init, &i); + + const char *output = NUM(x); + switch (et->fixnum.kind) { + case FX_CHAR: _BYTE(output); break; + case FX_SHORT: _WORD(output); break; + case FX_LONG: case FX_LLONG: + _QUAD(output); + break; + case FX_INT: case FX_ENUM: + _LONG(output); + break; + } + *poffset = offset += type_size(et); + + return i; +} +#endif + +static void construct_initial_value(const Type *type, const Initializer *init) { + assert(init == NULL || init->kind != IK_DOT); + + switch (type->kind) { + case TY_FLONUM: +#ifndef __NO_FLONUM + switch (type->flonum.kind) { + case FL_DOUBLE: + case FL_LDOUBLE: // long-double in XCC is same as double. + { + union {double f; uint64_t h;} v; + v.f = 0; + if (init != NULL) { + assert(init->kind == IK_SINGLE); + Expr *value = init->single; + if (!(is_const(value) && is_flonum(value->type))) + error("Illegal initializer: constant number expected"); + v.f = value->flonum; + } +#if 0 + _DOUBLE(FLONUM(v.d)); +#else + _QUAD(HEXNUM(v.h)); +#endif + } + break; + case FL_FLOAT: + { + union {float f; uint32_t h;} v; + v.f = 0; + if (init != NULL) { + assert(init->kind == IK_SINGLE); + Expr *value = init->single; + if (!(is_const(value) && is_flonum(value->type))) + error("Illegal initializer: constant number expected"); + v.f = value->flonum; + } +#if 0 + _FLOAT(FLONUM(v.f)); +#else + _LONG(HEXNUM(v.h)); +#endif + } + break; + } +#else + assert(false); +#endif + break; + case TY_FIXNUM: + case TY_PTR: + { + Expr *var = NULL; + Fixnum offset = 0; + if (init != NULL) { + assert(init->kind == IK_SINGLE); + eval_initial_value(init->single, &var, &offset); + } + const char *output; + if (var == NULL) { + output = NUM(offset); + } else { + const Name *name = var->var.name; + Scope *scope; + VarInfo *varinfo = scope_find(var->var.scope, name, &scope); + assert(varinfo != NULL); + if (!is_global_scope(scope) && varinfo->storage & VS_STATIC) { + varinfo = varinfo->static_.gvar; + assert(varinfo != NULL); + name = varinfo->name; + } + + char *label = fmt_name(name); + if ((varinfo->storage & VS_STATIC) == 0) + label = MANGLE(label); + label = quote_label(label); + + if (offset == 0) { + output = label; + } else { + output = fmt("%s + %" PRId64, label, offset); + } + } + if (type->kind == TY_PTR) { + _QUAD(output); + } else { + switch (type->fixnum.kind) { + case FX_CHAR: _BYTE(output); break; + case FX_SHORT: _WORD(output); break; + case FX_LONG: case FX_LLONG: + _QUAD(output); + break; + case FX_INT: case FX_ENUM: + _LONG(output); + break; + } + } + } + break; + case TY_ARRAY: + if (init == NULL || init->kind == IK_MULTI) { + const Type *elem_type = type->pa.ptrof; + ssize_t index = 0; + if (init != NULL) { + Vector *init_array = init->multi; + for (ssize_t i = 0; i < init_array->len; ++i, ++index) { + const Initializer *init_elem = init_array->data[i]; + construct_initial_value(elem_type, init_elem); + } + } + // Padding + for (ssize_t i = index, n = type->pa.length; i < n; ++i) + construct_initial_value(elem_type, NULL); + break; + } + if (init->kind == IK_SINGLE) { + Expr *e = strip_cast(init->single); + if (e->kind == EX_STR && is_char_type(type->pa.ptrof, e->str.kind)) { + size_t src_size = e->str.len * type_size(e->type->pa.ptrof); + size_t size = type_size(type); + if (src_size > size) + src_size = size; + + UNUSED(size); + StringBuffer sb; + sb_init(&sb); + sb_append(&sb, "\"", NULL); + escape_string(e->str.buf, src_size, &sb); + if (size > src_size) { + const char NULCHR[] = "\\0"; + for (size_t i = 0, n = size - src_size; i < n; ++i) + sb_append(&sb, NULCHR, NULL); + } + sb_append(&sb, "\"", NULL); + _ASCII(sb_to_string(&sb)); + break; + } + } + error("Illegal initializer"); + break; + case TY_STRUCT: + { + const StructInfo *sinfo = type->struct_.info; + assert(init == NULL || (init->kind == IK_MULTI && init->multi->len == sinfo->member_count)); + int count = 0; + int offset = 0; + for (int i = 0, n = sinfo->member_count; i < n; ++i) { + const MemberInfo *member = &sinfo->members[i]; +#ifndef __NO_BITFIELD + if (member->bitfield.width >= 0) { + i = construct_initial_value_bitfield(sinfo, init, i, &offset); + ++count; + continue; + } +#endif + const Initializer *mem_init; + if (init == NULL) { + if (sinfo->is_union) + continue; + mem_init = NULL; + } else { + mem_init = init->multi->data[i]; + } + if (mem_init != NULL || !sinfo->is_union) { + int align = align_size(member->type); + if (offset % align != 0) { + EMIT_ALIGN(align); + offset = ALIGN(offset, align); + } + construct_initial_value(member->type, mem_init); + ++count; + offset += type_size(member->type); + } + } + if (sinfo->is_union && count <= 0) { + const MemberInfo *member = &sinfo->members[0]; + construct_initial_value(member->type, NULL); + offset += type_size(member->type); + } + + size_t size = type_size(type); + if (size != (size_t)offset) { + // Put padding. + int d = size - offset; + switch (d) { + case 1: _BYTE(NUM(0)); break; + case 2: _WORD(NUM(0)); break; + case 4: _LONG(NUM(0)); break; + case 8: _QUAD(NUM(0)); break; + default: + for (int i = 0; i < d; ++i) + _BYTE(NUM(0)); + break; + } + } + } + break; + case TY_FUNC: case TY_VOID: assert(false); break; + } +} + +static void emit_varinfo(const VarInfo *varinfo, const Initializer *init) { + const Name *name = varinfo->name; + if (init != NULL) { + if (varinfo->type->qualifier & TQ_CONST) + _RODATA(); + else + _DATA(); + } + + char *label = fmt_name(name); + if ((varinfo->storage & VS_STATIC) == 0) { // global + label = quote_label(MANGLE(label)); + _GLOBL(label); + } else { + label = quote_label(label); + _LOCAL(label); + } + + if (init != NULL) { + EMIT_ALIGN(align_size(varinfo->type)); + EMIT_LABEL(label); + construct_initial_value(varinfo->type, init); + } else { + size_t size = type_size(varinfo->type); + if (size < 1) + size = 1; + + size_t align = align_size(varinfo->type); + _BSS(label, size, align); + } +} + +//////////////////////////////////////////////// + +static const char *kRegParam64s[] = {A0, A1, A2, A3, A4, A5, A6, A7}; + +static bool is_asm(Stmt *stmt) { + return stmt->kind == ST_ASM; +} + +static int put_vaarg_params(Function *func) { + assert(func->type->func.vaargs); +#if VAARG_ON_STACK + return; +#else + RegParamInfo iparams[MAX_REG_ARGS]; + RegParamInfo fparams[MAX_FREG_ARGS]; + int iparam_count = 0; + int fparam_count = 0; + enumerate_register_params(func, iparams, MAX_REG_ARGS, fparams, MAX_FREG_ARGS, + &iparam_count, &fparam_count); + + int size = 0; + int n = MAX_REG_ARGS - iparam_count; + if (n > 0) { + size = n * POINTER_SIZE; + ADDI(SP, SP, IM(-size)); + for (int i = iparam_count, offset = 0; i < MAX_REG_ARGS; ++i, offset += POINTER_SIZE) + SD(kRegParam64s[i], IMMEDIATE_OFFSET(offset, SP)); + } + return size; +#endif +} + +static void move_params_to_assigned(Function *func) { + extern const char *kReg64s[]; + extern const int ArchRegParamMapping[]; + extern const char *kFReg64s[]; + + // static const char *kRegParam32s[] = {W0, W1, W2, W3, W4, W5, W6, W7}; + // static const char **kRegParamTable[] = {kRegParam32s, kRegParam32s, kRegParam32s, kRegParam64s}; + // const char *kFRegParam32s[] = {S0, S1, S2, S3, S4, S5, S6, S7}; + const char *kFRegParam64s[] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7}; + static const int kPow2Table[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3}; +#define kPow2TableSize ((int)(sizeof(kPow2Table) / sizeof(*kPow2Table))) + + RegParamInfo iparams[MAX_REG_ARGS]; + RegParamInfo fparams[MAX_FREG_ARGS]; + int iparam_count = 0; + int fparam_count = 0; + enumerate_register_params(func, iparams, MAX_REG_ARGS, fparams, MAX_FREG_ARGS, + &iparam_count, &fparam_count); + + // Generate code to store parameters to the destination. + for (int i = 0; i < iparam_count; ++i) { + RegParamInfo *p = &iparams[i]; + VReg *vreg = p->vreg; + size_t size = type_size(p->type); + assert(0 < size && size < kPow2TableSize && kPow2Table[size] >= 0); + int pow = kPow2Table[size]; + const char *src = kReg64s[p->index]; + if (vreg->flag & VRF_SPILLED) { + int offset = vreg->frame.offset; + assert(offset != 0); + const char *dst; + // if (offset >= -256) { + dst = IMMEDIATE_OFFSET(offset, FP); + // } else { + // mov_immediate(X9, offset, true, false); // x9 broken. + // dst = REG_OFFSET(FP, X9, NULL); + // } + switch (pow) { + case 0: SB(src, dst); break; + case 1: SH(src, dst); break; + case 2: SW(src, dst); break; + case 3: SD(src, dst); break; + default: assert(false); break; + } + } else if (ArchRegParamMapping[p->index] != vreg->phys) { + const char *dst = kReg64s[vreg->phys]; + MV(dst, src); + } + } + for (int i = 0; i < fparam_count; ++i) { + RegParamInfo *p = &fparams[i]; + VReg *vreg = p->vreg; + const char *src = kFRegParam64s[p->index]; + if (vreg->flag & VRF_SPILLED) { + int offset = vreg->frame.offset; + assert(offset != 0); + assert(offset != 0); + SD(src, IMMEDIATE_OFFSET(offset, FP)); + } else { + if (p->index != vreg->phys) { + const char *dst = kFReg64s[vreg->phys]; + FMV_D(dst, src); + } + } + } +} + +static void emit_defun(Function *func) { + if (func->scopes == NULL || // Prototype definition. + func->extra == NULL) // Code emission is omitted. + return; + + emit_comment(NULL); + _TEXT(); + + bool global = true; + const VarInfo *varinfo = scope_find(global_scope, func->name, NULL); + if (varinfo != NULL) { + global = (varinfo->storage & VS_STATIC) == 0; + } + + char *label = fmt_name(func->name); + if (global) { + label = quote_label(MANGLE(label)); + _GLOBL(label); + } else { + emit_comment("%.*s: static func", NAMES(func->name)); + label = quote_label(label); + _LOCAL(label); + } + EMIT_ALIGN(2); + EMIT_LABEL(label); + + bool no_stmt = true; + if (func->body_block != NULL) { + Vector *stmts = func->body_block->block.stmts; + for (int i = 0; i < stmts->len; ++i) { + Stmt *stmt = stmts->data[i]; + if (stmt == NULL) + continue; + if (!is_asm(stmt)) { + no_stmt = false; + break; + } + } + } + + // Prologue + // Allocate variable bufer. + FuncBackend *fnbe = func->extra; + size_t frame_size = ALIGN(fnbe->frame_size, 16); + bool fp_saved = false; // Frame pointer saved? + bool ra_saved = false; // Return Address register saved? + unsigned long used_reg_bits = fnbe->ra->used_reg_bits; + int vaarg_params_saved = 0; + if (!no_stmt) { + if (func->type->func.vaargs) { + vaarg_params_saved = put_vaarg_params(func); + + // Re-align frame size. + frame_size = ALIGN(fnbe->frame_size + vaarg_params_saved, 16) - vaarg_params_saved; + } + + fp_saved = frame_size > 0 || fnbe->ra->flag & RAF_STACK_FRAME; + ra_saved = (func->flag & FUNCF_HAS_FUNCALL) != 0; + + // TODO: Handle fp_saved and ra_saved individually. + if (fp_saved || ra_saved) { + // STP(FP, LR, PRE_INDEX(SP, -16)); + ADDI(SP, SP, IM(-16)); + SD(RA, IMMEDIATE_OFFSET(8, SP)); + SD(FP, IMMEDIATE_OFFSET0(SP)); + + // FP is saved, so omit from callee save. + used_reg_bits &= ~(1UL << GET_FPREG_INDEX()); + } + + // Callee save. + push_callee_save_regs(used_reg_bits, fnbe->ra->used_freg_bits); + + if (fp_saved) { + MV(FP, SP); + if (frame_size > 0) { + const char *value; + // if (frame_size <= 0x0fff) { + value = IM(-frame_size); + ADDI(SP, SP, value); + // } else { + // // Break x17 + // mov_immediate(value = X17, frame_size, true, false); + // } + } + } + + move_params_to_assigned(func); + } + + emit_bb_irs(fnbe->bbcon); + + if (!function_not_returned(fnbe)) { + // Epilogue + if (!no_stmt) { + if (fp_saved) + MV(SP, FP); + + pop_callee_save_regs(used_reg_bits, fnbe->ra->used_freg_bits); + + if (fp_saved || ra_saved) { + LD(FP, IMMEDIATE_OFFSET0(SP)); + LD(RA, IMMEDIATE_OFFSET(8, SP)); + ADD(SP, SP, IM(16)); + } + } + if (vaarg_params_saved > 0) + ADD(SP, SP, IM(vaarg_params_saved)); + + RET(); + } + + // Static variables are emitted through global variables. +} + +static void emit_asm(Expr *asmstr) { + assert(asmstr->kind == EX_STR); + EMIT_ASM(asmstr->str.buf); +} + +void emit_code(Vector *decls) { + for (int i = 0, len = decls->len; i < len; ++i) { + Declaration *decl = decls->data[i]; + if (decl == NULL) + continue; + + switch (decl->kind) { + case DCL_DEFUN: + emit_defun(decl->defun.func); + break; + case DCL_VARDECL: + break; + case DCL_ASM: + emit_asm(decl->asmstr); + break; + } + } + + emit_comment(NULL); + for (int i = 0; i < global_scope->vars->len; ++i) { + VarInfo *varinfo = global_scope->vars->data[i]; + if ((varinfo->storage & (VS_EXTERN | VS_ENUM_MEMBER)) || varinfo->type->kind == TY_FUNC) + continue; + emit_varinfo(varinfo, varinfo->global.init); + } +} diff --git a/src/cc/arch/riscv64/emit_code.h b/src/cc/arch/riscv64/emit_code.h new file mode 100644 index 000000000..8167771cc --- /dev/null +++ b/src/cc/arch/riscv64/emit_code.h @@ -0,0 +1,14 @@ +// Emit code + +#pragma once + +#include // int64_t + +typedef struct Vector Vector; + +void emit_code(Vector *decls); + +char *im(int64_t x); +char *immediate_offset(int offset, const char *reg); +char *label_offset_hi(char *label); +char *label_offset_lo(char *label); diff --git a/src/cc/arch/riscv64/ir_riscv64.c b/src/cc/arch/riscv64/ir_riscv64.c new file mode 100644 index 000000000..6b3aa3b36 --- /dev/null +++ b/src/cc/arch/riscv64/ir_riscv64.c @@ -0,0 +1,1114 @@ +#include "../../../config.h" +#include "./arch_config.h" +#include "ir.h" + +#include +#include // malloc +#include + +#include "ast.h" +#include "emit_code.h" +#include "regalloc.h" +#include "riscv64.h" +#include "table.h" +#include "util.h" + +static Vector *push_caller_save_regs(unsigned long living); +static void pop_caller_save_regs(Vector *saves); + +// Register allocator + +// AArch64: Calling Convention +// X8(XR): Indirect return value address. +// X16(IP0), X17(IP1): Intra-Procedure-call scratch registers. +// X18(PR): Platform register. Used for some operating-system-specific special purpose or an additional caller-saved register. +// X29(FP): Frame pointer (Callee save) + +// static const char *kReg32s[PHYSICAL_REG_MAX] = { +// W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W16, // Temporary +// W19, W20, W21, W22, W23, W24, W25, W26, W27, W28, W29, // Callee save +// W10, W11, W12, W13, W14, W15, W18}; // Caller save +// static const char *kReg64s[PHYSICAL_REG_MAX] = { +// X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X16, // Temporary +// X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, // Callee save +// X10, X11, X12, X13, X14, X15, X18}; // Caller save +const char *kReg64s[PHYSICAL_REG_MAX] = { + A0, A1, A2, A3, A4, A5, A6, A7, // Temporary + S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, FP, // Callee save + T0, T1, T2}; // Caller save + +#define GET_A0_INDEX() 0 +// #define GET_X16_INDEX() 10 + +#define CALLEE_SAVE_REG_COUNT ((int)(sizeof(kCalleeSaveRegs) / sizeof(*kCalleeSaveRegs))) +static const int kCalleeSaveRegs[] = {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + +#define CALLER_SAVE_REG_COUNT ((int)(sizeof(kCallerSaveRegs) / sizeof(*kCallerSaveRegs))) +static const int kCallerSaveRegs[] = {19, 20, 21}; + +const int ArchRegParamMapping[] = {0, 1, 2, 3, 4, 5, 6, 7}; + +// const char **kRegSizeTable[] = {kReg32s, kReg32s, kReg32s, kReg64s}; +// static const char *kZeroRegTable[] = {WZR, WZR, WZR, XZR}; + +// Break s1 in store, mod and tjmp +static const char *kTmpReg = S1; + +#define SZ_FLOAT VRegSize4 +#define SZ_DOUBLE VRegSize8 +const char *kFReg64s[PHYSICAL_FREG_MAX] = { + FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7, + FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, FS8, FS9, FS10, FS11, + FT0, FT1, FT2, FT3, FT4, FT5, FT6, FT7, FT8, FT9, FT10, FT11, +}; +#define kFReg32s kFReg64s + +#define GET_FA0_INDEX() 0 + +#define CALLEE_SAVE_FREG_COUNT ((int)(sizeof(kCalleeSaveFRegs) / sizeof(*kCalleeSaveFRegs))) +static const int kCalleeSaveFRegs[] = {8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19}; + +#define CALLER_SAVE_FREG_COUNT ((int)(sizeof(kCallerSaveFRegs) / sizeof(*kCallerSaveFRegs))) +static const int kCallerSaveFRegs[] = {20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + +static unsigned long detect_extra_occupied(RegAlloc *ra, IR *ir) { + UNUSED(ir); + unsigned long ioccupy = 0; + // switch (ir->kind) { + // case IR_JMP: case IR_TJMP: case IR_CALL: + // ioccupy = 1UL << GET_X16_INDEX(); + // break; + // default: break; + // } + if (ra->flag & RAF_STACK_FRAME) + ioccupy |= 1UL << GET_FPREG_INDEX(); + return ioccupy; +} + +const RegAllocSettings kArchRegAllocSettings = { + .detect_extra_occupied = detect_extra_occupied, + .reg_param_mapping = ArchRegParamMapping, + .phys_max = PHYSICAL_REG_MAX, + .phys_temporary_count = PHYSICAL_REG_TEMPORARY, +#ifndef __NO_FLONUM + .fphys_max = PHYSICAL_FREG_MAX, + .fphys_temporary_count = PHYSICAL_FREG_TEMPORARY, +#endif +}; + +// + +bool is_im12(intptr_t x) { + return x <= ((1L << 11) - 1) && x >= -(1L << 11); +} + +void mov_immediate(const char *dst, int64_t value, bool is_unsigned) { + UNUSED(is_unsigned); + LI(dst, IM(value)); +} + +static void ei_bofs(IR *ir) { + const char *dst = kReg64s[ir->dst->phys]; + int ofs = ir->bofs.frameinfo->offset; + // if (ofs < 4096 && ofs > -4096) { + ADDI(dst, FP, IM(ofs)); + // } else { + // mov_immediate(dst, ofs, true, false); + // ADD(dst, dst, FP); + // } +} + +static void ei_iofs(IR *ir) { + char *label = fmt_name(ir->iofs.label); + if (ir->iofs.global) + label = MANGLE(label); + label = quote_label(label); + const char *dst = kReg64s[ir->dst->phys]; + // if (!is_got(ir->iofs.label)) { + LUI(dst, LABEL_OFFSET_HI(label)); + ADDI(dst, dst, LABEL_OFFSET_LO(label)); + // } else { + // ADRP(dst, LABEL_AT_GOTPAGE(label)); + // LDR(dst, fmt("[%s,#%s]", dst, LABEL_AT_GOTPAGEOFF(label))); + // } +} + +static void ei_sofs(IR *ir) { + assert(ir->opr1->flag & VRF_CONST); + const char *dst = kReg64s[ir->dst->phys]; + // int ofs = ir->opr1->frame.offset; + // if (ofs < 4096 && ofs > -4096) { + ADDI(dst, SP, IM(ir->opr1->fixnum)); + // } else { + // mov_immediate(dst, ofs, true, false); + // ADD(dst, dst, SP); + // } +} + +#define ei_load_s ei_load +static void ei_load(IR *ir) { + assert(!(ir->opr1->flag & VRF_CONST)); + const char *src; + if (ir->kind == IR_LOAD) { + assert(!(ir->opr1->flag & VRF_SPILLED)); + src = IMMEDIATE_OFFSET0(kReg64s[ir->opr1->phys]); + } else { + assert(ir->opr1->flag & VRF_SPILLED); + if (ir->opr1->frame.offset >= -4096 && ir->opr1->frame.offset <= 4096) { + src = IMMEDIATE_OFFSET(ir->opr1->frame.offset, FP); + } else { + mov_immediate(kTmpReg, ir->opr1->frame.offset, false); + ADD(kTmpReg, kTmpReg, FP); + src = IMMEDIATE_OFFSET0(kTmpReg); + } + } + + const char *dst; + if (ir->dst->flag & VRF_FLONUM) { + switch (ir->dst->vsize) { + case SZ_FLOAT: FLW(kFReg32s[ir->dst->phys], src); break; + case SZ_DOUBLE: FLD(kFReg64s[ir->dst->phys], src); break; + default: assert(false); break; + } + } else { + int pow = ir->dst->vsize; + assert(0 <= pow && pow < 4); + dst = kReg64s[ir->dst->phys]; + switch (pow) { + case 0: + if (ir->flag & IRF_UNSIGNED) LBU(dst, src); + else LB(dst, src); + break; + case 1: + if (ir->flag & IRF_UNSIGNED) LHU(dst, src); + else LH(dst, src); + break; + case 2: + if (ir->flag & IRF_UNSIGNED) LWU(dst, src); + else LW(dst, src); + break; + case 3: + LD(dst, src); + break; + default: assert(false); break; + } + } +} + +#define ei_store_s ei_store +static void ei_store(IR *ir) { + assert(!(ir->opr2->flag & VRF_CONST)); + const char *target; + if (ir->kind == IR_STORE) { + assert(!(ir->opr2->flag & VRF_SPILLED)); + target = IMMEDIATE_OFFSET0(kReg64s[ir->opr2->phys]); + } else { + assert(ir->opr2->flag & VRF_SPILLED); + if (ir->opr2->frame.offset >= -4096 && ir->opr2->frame.offset <= 4096) { + target = IMMEDIATE_OFFSET(ir->opr2->frame.offset, FP); + } else { + mov_immediate(kTmpReg, ir->opr2->frame.offset, false); + ADD(kTmpReg, kTmpReg, FP); + target = IMMEDIATE_OFFSET0(kTmpReg); + } + } + const char *src; + if (ir->opr1->flag & VRF_FLONUM) { + switch (ir->opr1->vsize) { + default: assert(false); // Fallthrough + case SZ_FLOAT: FSW(kFReg32s[ir->opr1->phys], target); break; + case SZ_DOUBLE: FSD(kFReg64s[ir->opr1->phys], target); break; + } + return; + } else if (ir->opr1->flag & VRF_CONST) { + if (ir->opr1->fixnum == 0) + src = ZERO; + else + mov_immediate(src = kTmpReg, ir->opr1->fixnum, ir->flag & IRF_UNSIGNED); + } else { + src = kReg64s[ir->opr1->phys]; + } + switch (ir->opr1->vsize) { + case 0: SB(src, target); break; + case 1: SH(src, target); break; + case 2: SW(src, target); break; + case 3: SD(src, target); break; + default: assert(false); break; + } +} + +static void ei_add(IR *ir) { + if (ir->dst->flag & VRF_FLONUM) { + switch (ir->dst->vsize) { + default: assert(false); // Fallthrough + case SZ_FLOAT: FADD_S(kFReg32s[ir->dst->phys], kFReg32s[ir->opr1->phys], kFReg32s[ir->opr2->phys]); break; + case SZ_DOUBLE: FADD_D(kFReg64s[ir->dst->phys], kFReg64s[ir->opr1->phys], kFReg64s[ir->opr2->phys]); break; + } + } else { + assert(!(ir->opr1->flag & VRF_CONST)); + const char *dst = kReg64s[ir->dst->phys]; + if (ir->dst->vsize <= 2 && !(ir->flag & IRF_UNSIGNED)) { + if (ir->opr2->flag & VRF_CONST) { + ADDIW(dst, kReg64s[ir->opr1->phys], IM(ir->opr2->fixnum)); + } else { + ADDW(dst, kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + } + } else { + if (ir->opr2->flag & VRF_CONST) { + ADDI(dst, kReg64s[ir->opr1->phys], IM(ir->opr2->fixnum)); + } else { + ADD(dst, kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + } + } + } +} + +static void ei_sub(IR *ir) { + if (ir->dst->flag & VRF_FLONUM) { + switch (ir->dst->vsize) { + default: assert(false); // Fallthrough + case SZ_FLOAT: FSUB_S(kFReg32s[ir->dst->phys], kFReg32s[ir->opr1->phys], kFReg32s[ir->opr2->phys]); break; + case SZ_DOUBLE: FSUB_D(kFReg64s[ir->dst->phys], kFReg64s[ir->opr1->phys], kFReg64s[ir->opr2->phys]); break; + } + } else { + assert(!(ir->opr1->flag & VRF_CONST)); + const char *dst = kReg64s[ir->dst->phys]; + if (ir->dst->vsize <= 2 && !(ir->flag & IRF_UNSIGNED)) { + if (ir->opr2->flag & VRF_CONST) { + ADDIW(dst, kReg64s[ir->opr1->phys], IM(-ir->opr2->fixnum)); + } else { + SUBW(dst, kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + } + } else { + if (ir->opr2->flag & VRF_CONST) { + ADDI(dst, kReg64s[ir->opr1->phys], IM(-ir->opr2->fixnum)); + } else { + SUB(dst, kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + } + } + } +} + +static void ei_mul(IR *ir) { + if (ir->dst->flag & VRF_FLONUM) { + switch (ir->dst->vsize) { + default: assert(false); // Fallthrough + case SZ_FLOAT: FMUL_S(kFReg32s[ir->dst->phys], kFReg32s[ir->opr1->phys], kFReg32s[ir->opr2->phys]); break; + case SZ_DOUBLE: FMUL_D(kFReg64s[ir->dst->phys], kFReg64s[ir->opr1->phys], kFReg64s[ir->opr2->phys]); break; + } + } else { + assert(!(ir->opr1->flag & VRF_CONST) && !(ir->opr2->flag & VRF_CONST)); + if (ir->dst->vsize <= 2 && !(ir->flag & IRF_UNSIGNED)) { + MULW(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + } else { + MUL(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + } + } +} + +static void ei_div(IR *ir) { + if (ir->dst->flag & VRF_FLONUM) { + switch (ir->dst->vsize) { + default: assert(false); // Fallthrough + case SZ_FLOAT: FDIV_S(kFReg32s[ir->dst->phys], kFReg32s[ir->opr1->phys], kFReg32s[ir->opr2->phys]); break; + case SZ_DOUBLE: FDIV_D(kFReg64s[ir->dst->phys], kFReg64s[ir->opr1->phys], kFReg64s[ir->opr2->phys]); break; + } + } else { + assert(!(ir->opr1->flag & VRF_CONST) && !(ir->opr2->flag & VRF_CONST)); + if (ir->dst->vsize <= 2) { + if (!(ir->flag & IRF_UNSIGNED)) + DIVW(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + else + DIVUW(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + } else { + if (!(ir->flag & IRF_UNSIGNED)) + DIV(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + else + DIVU(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + } + } +} + +static void ei_mod(IR *ir) { + assert(!(ir->dst->flag & VRF_FLONUM)); + assert(!(ir->opr1->flag & VRF_CONST) && !(ir->opr2->flag & VRF_CONST)); + if (ir->dst->vsize <= 2) { + if (!(ir->flag & IRF_UNSIGNED)) + REMW(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + else + REMUW(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + } else { + if (!(ir->flag & IRF_UNSIGNED)) + REM(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + else + REMU(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); + } +} + +static void ei_bitand(IR *ir) { + assert(!(ir->opr1->flag & VRF_CONST)); + if (ir->opr2->flag & VRF_CONST) + ANDI(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], IM(ir->opr2->fixnum)); + else + AND(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); +} + +static void ei_bitor(IR *ir) { + assert(!(ir->opr1->flag & VRF_CONST)); + if (ir->opr2->flag & VRF_CONST) + ORI(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], IM(ir->opr2->fixnum)); + else + OR(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); +} + +static void ei_bitxor(IR *ir) { + assert(!(ir->opr1->flag & VRF_CONST)); + if (ir->opr2->flag & VRF_CONST) + XORI(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], IM(ir->opr2->fixnum)); + else + XOR(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); +} + +static void ei_lshift(IR *ir) { + assert(!(ir->opr1->flag & VRF_CONST)); + if (ir->opr2->flag & VRF_CONST) + SLLI(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], IM(ir->opr2->fixnum)); + else + SLL(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys], kReg64s[ir->opr2->phys]); +} + +static void ei_rshift(IR *ir) { + assert(!(ir->opr1->flag & VRF_CONST)); + const char *dst = kReg64s[ir->dst->phys]; + const char *opr1 = kReg64s[ir->opr1->phys]; + if (ir->opr2->flag & VRF_CONST) { + const char *opr2 = IM(ir->opr2->fixnum); + if (ir->flag & IRF_UNSIGNED) SRLI(dst, opr1, opr2); + else SRAI(dst, opr1, opr2); + } else { + const char *opr2 = kReg64s[ir->opr2->phys]; + if (ir->flag & IRF_UNSIGNED) SRL(dst, opr1, opr2); + else SRA(dst, opr1, opr2); + } +} + +static void ei_result(IR *ir) { + if (ir->opr1->flag & VRF_FLONUM) { + int dstphys = ir->dst != NULL ? ir->dst->phys : GET_FA0_INDEX(); + if (ir->opr1->phys != dstphys) { // Source is not return register. + const char **regs; + switch (ir->opr1->vsize) { + default: assert(false); // Fallthroguh + case SZ_FLOAT: regs = kFReg32s; break; + case SZ_DOUBLE: regs = kFReg64s; break; + } + FMV_D(regs[dstphys], regs[ir->opr1->phys]); + } + } else { + int dstphys = ir->dst != NULL ? ir->dst->phys : GET_A0_INDEX(); + const char *dst = kReg64s[dstphys]; + if (ir->opr1->flag & VRF_CONST) { + mov_immediate(dst, ir->opr1->fixnum, ir->flag & IRF_UNSIGNED); + } else if (ir->opr1->phys != dstphys) { // Source is not return register. + MV(dst, kReg64s[ir->opr1->phys]); + } + } +} + +static void ei_subsp(IR *ir) { + if (ir->opr1->flag & VRF_CONST) { + // assert(ir->opr1->fixnum % 16 == 0); + if (ir->opr1->fixnum > 0) + ADDI(SP, SP, IM(-ir->opr1->fixnum)); + else if (ir->opr1->fixnum < 0) + ADDI(SP, SP, IM(-ir->opr1->fixnum)); + } else { + SUB(SP, SP, kReg64s[ir->opr1->phys]); + } + if (ir->dst != NULL) + MV(kReg64s[ir->dst->phys], SP); +} + +static void ei_mov(IR *ir) { + if (ir->dst->flag & VRF_FLONUM) { + if (ir->opr1->phys != ir->dst->phys) { + const char *src, *dst; + switch (ir->dst->vsize) { + default: assert(false); // Fallthrough + case SZ_FLOAT: dst = kFReg32s[ir->dst->phys]; src = kFReg32s[ir->opr1->phys]; break; + case SZ_DOUBLE: dst = kFReg64s[ir->dst->phys]; src = kFReg64s[ir->opr1->phys]; break; + } + FMV_D(dst, src); + } + } else { + assert(!(ir->dst->flag & VRF_CONST)); + const char *dst = kReg64s[ir->dst->phys]; + if (ir->opr1->flag & VRF_CONST) { + mov_immediate(dst, ir->opr1->fixnum, ir->flag & IRF_UNSIGNED); + } else { + if (ir->opr1->phys != ir->dst->phys) { + MV(dst, kReg64s[ir->opr1->phys]); + } + } + } +} + +static void ei_neg(IR *ir) { + assert(!(ir->opr1->flag & VRF_CONST)); + NEG(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys]); +} + +static void ei_bitnot(IR *ir) { + assert(!(ir->opr1->flag & VRF_CONST)); + NOT(kReg64s[ir->dst->phys], kReg64s[ir->opr1->phys]); +} + +static void ei_cond(IR *ir) { + assert(ir->opr1 != NULL); + assert(ir->opr2 != NULL); + const char *dst = kReg64s[ir->dst->phys]; + assert(!(ir->opr1->flag & VRF_CONST)); + int cond = ir->cond.kind & (COND_MASK | COND_UNSIGNED); + + if (ir->opr1->flag & VRF_FLONUM) { + assert(ir->opr2->flag & VRF_FLONUM); + const char *o1 = kFReg64s[ir->opr1->phys]; + const char *o2 = kFReg64s[ir->opr2->phys]; + + assert(!(ir->dst->flag & VRF_FLONUM)); + const char *dst = kReg64s[ir->dst->phys]; + switch (cond) { + case COND_EQ: + case COND_NE: + switch (ir->opr1->vsize) { + default: assert(false); // Fallthrough + case SZ_FLOAT: FEQ_S(dst, o1, o2); break; + case SZ_DOUBLE: FEQ_D(dst, o1, o2); break; + } + if (cond == COND_NE) + SEQZ(dst, dst); + break; + + case COND_GT: + { + const char *tmp = o1; + o1 = o2; + o2 = tmp; + } + // Fallthrough + case COND_LT: + switch (ir->opr1->vsize) { + default: assert(false); // Fallthrough + case SZ_FLOAT: FLT_S(dst, o1, o2); break; + case SZ_DOUBLE: FLT_D(dst, o1, o2); break; + } + break; + + case COND_GE: + { + const char *tmp = o1; + o1 = o2; + o2 = tmp; + } + // Fallthrough + case COND_LE: + switch (ir->opr1->vsize) { + default: assert(false); // Fallthrough + case SZ_FLOAT: FLE_S(dst, o1, o2); break; + case SZ_DOUBLE: FLE_D(dst, o1, o2); break; + } + break; + + default: assert(false); break; + } + return; + } + + const char *opr1 = kReg64s[ir->opr1->phys]; + + switch (cond) { + case COND_EQ: case COND_EQ | COND_UNSIGNED: + case COND_NE: case COND_NE | COND_UNSIGNED: + assert((ir->opr2->flag & VRF_CONST) && ir->opr2->fixnum == 0); + if ((cond & COND_MASK) == COND_EQ) + SEQZ(dst, opr1); + else + SNEZ(dst, opr1); + break; + + case COND_LT: case COND_LT | COND_UNSIGNED: + case COND_GT: case COND_GT | COND_UNSIGNED: + { + VReg *opr1 = ir->opr1, *opr2 = ir->opr2; + if ((cond & COND_MASK) == COND_GT) { + opr1 = ir->opr2; + opr2 = ir->opr1; + } + assert(!(opr1->flag & VRF_CONST)); + const char *o1 = kReg64s[opr1->phys]; + if (!(cond & COND_UNSIGNED)) { + if (opr2->flag & VRF_CONST) + SLTI(dst, o1, IM(opr2->fixnum)); + else + SLT(dst, o1, kReg64s[opr2->phys]); + } else { + if (opr2->flag & VRF_CONST) + SLTIU(dst, o1, IM(opr2->fixnum)); + else + SLTU(dst, o1, kReg64s[opr2->phys]); + } + } + break; + case COND_LE: case COND_LE | COND_UNSIGNED: + case COND_GE: case COND_GE | COND_UNSIGNED: + { + VReg *opr1 = ir->opr1, *opr2 = ir->opr2; + if ((cond & COND_MASK) == COND_GE) { + opr1 = ir->opr2; + opr2 = ir->opr1; + } + assert(!(opr2->flag & VRF_CONST)); + // lhs <= rhs <=> !(rhs < lhs) <=> 1 - (rhs < lhs) + const char *o2 = kReg64s[opr2->phys]; + if (!(cond & COND_UNSIGNED)) { + if (opr1->flag & VRF_CONST) + SLTI(dst, o2, IM(opr1->fixnum)); + else + SLT(dst, o2, kReg64s[opr1->phys]); + } else { + if (opr1->flag & VRF_CONST) + SLTIU(dst, o2, IM(opr1->fixnum)); + else + SLTU(dst, o2, kReg64s[opr1->phys]); + } + NEG(dst, dst); + ADDI(dst, dst, IM(1)); + } + break; + default: assert(false); break; + } +} + +static void ei_jmp(IR *ir) { + const char *label = fmt_name(ir->jmp.bb->label); + switch (ir->jmp.cond & (COND_MASK | COND_UNSIGNED)) { + case COND_ANY: J(label); return; + case COND_NONE: return; + default: break; + } + + assert(!(ir->opr1->flag & VRF_CONST)); + assert(!(ir->opr2->flag & VRF_CONST) || ir->opr2->fixnum == 0); + + const char *opr1 = kReg64s[ir->opr1->phys]; + const char *opr2 = !(ir->opr2->flag & VRF_CONST) ? kReg64s[ir->opr2->phys] : ZERO; + + // On aarch64, flag for comparing flonum is signed. + switch (ir->jmp.cond & (COND_MASK | COND_UNSIGNED)) { + case COND_EQ | COND_UNSIGNED: // Fallthrough + case COND_EQ: Bcc(CEQ, opr1, opr2, label); break; + + case COND_NE | COND_UNSIGNED: // Fallthrough + case COND_NE: Bcc(CNE, opr1, opr2, label); break; + + case COND_LT: Bcc(CLT, opr1, opr2, label); break; + case COND_GT: Bcc(CLT, opr2, opr1, label); break; + case COND_LE: Bcc(CGE, opr2, opr1, label); break; + case COND_GE: Bcc(CGE, opr1, opr2, label); break; + + case COND_LT | COND_UNSIGNED: Bcc(CLTU, opr1, opr2, label); break; + case COND_GT | COND_UNSIGNED: Bcc(CLTU, opr2, opr1, label); break; + case COND_LE | COND_UNSIGNED: Bcc(CGEU, opr2, opr1, label); break; + case COND_GE | COND_UNSIGNED: Bcc(CGEU, opr1, opr2, label); break; + default: assert(false); break; + } +} + +static void ei_tjmp(IR *ir) { + const char *dst = kTmpReg; + const Name *table_label = alloc_label(); + char *label = fmt_name(table_label); + LUI(dst, LABEL_OFFSET_HI(label)); + ADDI(dst, dst, LABEL_OFFSET_LO(label)); + // dst = label + (opr1 << 3) + assert(!(ir->opr1->flag & VRF_CONST)); + const char *opr1 = kReg64s[ir->opr1->phys]; + SLLI(opr1, opr1, IM(3)); + ADD(dst, dst, opr1); + LD(dst, IMMEDIATE_OFFSET0(dst)); + JR(dst); + + _RODATA(); + EMIT_ALIGN(8); + EMIT_LABEL(fmt_name(table_label)); + for (size_t i = 0, len = ir->tjmp.len; i < len; ++i) { + BB *bb = ir->tjmp.bbs[i]; + _QUAD(fmt("%.*s", NAMES(bb->label))); + } + _TEXT(); +} + +static void ei_precall(IR *ir) { + // Living registers are not modified between preparing function arguments, + // so safely saved before calculating argument values. + ir->precall.caller_saves = push_caller_save_regs(ir->precall.living_pregs); + + int align_stack = (16 - (ir->precall.stack_args_size)) & 15; + ir->precall.stack_aligned = align_stack; + + if (align_stack > 0) { + SUB(SP, SP, IM(align_stack)); + } +} + +static void ei_pusharg(IR *ir) { + assert(!(ir->opr1->flag & VRF_CONST)); + if (ir->opr1->flag & VRF_FLONUM) { +#if VAARG_FP_AS_GP + if (ir->pusharg.fp_as_gp) { + switch (ir->opr1->vsize) { + case SZ_FLOAT: FMV_X_W(kReg64s[ir->pusharg.index], kFReg32s[ir->opr1->phys]); break; + case SZ_DOUBLE: FMV_X_D(kReg64s[ir->pusharg.index], kFReg64s[ir->opr1->phys]); break; + default: assert(false); break; + } + return; + } +#endif + // Assume parameter registers are arranged from index 0. + if (ir->pusharg.index != ir->opr1->phys) { + switch (ir->opr1->vsize) { + case SZ_FLOAT: FMV_D(kFReg32s[ir->pusharg.index], kFReg32s[ir->opr1->phys]); break; + case SZ_DOUBLE: FMV_D(kFReg64s[ir->pusharg.index], kFReg64s[ir->opr1->phys]); break; + default: assert(false); break; + } + } + } else { + // Assume parameter registers are arranged from index 0. + if (ir->pusharg.index != ir->opr1->phys) + MV(kReg64s[ir->pusharg.index], kReg64s[ir->opr1->phys]); + } +} + +static void ei_call(IR *ir) { + if (ir->call.label != NULL) { + char *label = fmt_name(ir->call.label); + if (ir->call.global) + label = MANGLE(label); + CALL(quote_label(label)); + } else { + assert(!(ir->opr1->flag & VRF_CONST)); + JALR(kReg64s[ir->opr1->phys]); + } + + IR *precall = ir->call.precall; + int align_stack = precall->precall.stack_aligned + precall->precall.stack_args_size; + if (align_stack != 0) { + ADD(SP, SP, IM(align_stack)); + } + + // Resore caller save registers. + pop_caller_save_regs(precall->precall.caller_saves); + + if (ir->dst != NULL) { + if (ir->dst->flag & VRF_FLONUM) { + if (ir->dst->phys != GET_FA0_INDEX()) { + FMV_D(kFReg64s[ir->dst->phys], FA0); + } + } else { + if (ir->dst->phys != GET_A0_INDEX()) { + MV(kReg64s[ir->dst->phys], kReg64s[GET_A0_INDEX()]); + } + } + } +} + +static void ei_cast(IR *ir) { + assert((ir->opr1->flag & VRF_CONST) == 0); + if (ir->dst->flag & VRF_FLONUM) { + if (ir->opr1->flag & VRF_FLONUM) { + // flonum->flonum + assert(ir->dst->vsize != ir->opr1->vsize); + // Assume flonum are just two types. + switch (ir->dst->vsize) { + default: assert(false); // Fallthrough + case SZ_FLOAT: FCVT_S_D(kFReg32s[ir->dst->phys], kFReg64s[ir->opr1->phys]); break; + case SZ_DOUBLE: FCVT_D_S(kFReg64s[ir->dst->phys], kFReg32s[ir->opr1->phys]); break; + } + } else { + // fix->flonum + int pows = ir->opr1->vsize; + assert(0 <= pows && pows < 4); + + const char *src = kReg64s[ir->opr1->phys]; + switch (ir->dst->vsize) { + case SZ_FLOAT: + if (ir->flag & IRF_UNSIGNED) FCVT_S_WU(kFReg32s[ir->dst->phys], src); + else FCVT_S_W(kFReg32s[ir->dst->phys], src); + break; + case SZ_DOUBLE: + if (ir->flag & IRF_UNSIGNED) FCVT_D_WU(kFReg32s[ir->dst->phys], src); + else FCVT_D_W(kFReg32s[ir->dst->phys], src); + break; + default: assert(false); break; + } + } + } else if (ir->opr1->flag & VRF_FLONUM) { + // flonum->fix + switch (ir->opr1->vsize) { + case SZ_FLOAT: FCVT_W_S(kReg64s[ir->dst->phys], kFReg32s[ir->opr1->phys]); break; + case SZ_DOUBLE: FCVT_W_D(kReg64s[ir->dst->phys], kFReg64s[ir->opr1->phys]); break; + default: assert(false); break; + } + } else { + // fix->fix + assert(ir->dst->vsize != ir->opr1->vsize); + int pows = ir->opr1->vsize; + int powd = ir->dst->vsize; + assert(0 <= pows && pows < 4); + assert(0 <= powd && powd < 4); + int pow = MIN(powd, pows); + const char *dst = kReg64s[ir->dst->phys], *src = kReg64s[ir->opr1->phys]; + + if (ir->flag & IRF_UNSIGNED) { + const char *shift = IM((8 - (1 << pow)) * TARGET_CHAR_BIT); + SLLI(dst, src, shift); + SRLI(dst, dst, shift); + } else { + if (pow < 2) { + const char *shift = IM((4 - (1 << pows)) * TARGET_CHAR_BIT); + SLLIW(dst, src, shift); + SRAI(dst, dst, shift); + } else { + SEXTW(dst, src); + } + } + } +} + +static void ei_asm(IR *ir) { + EMIT_ASM(ir->asm_.str); + // if (ir->dst != NULL) { + // assert(!(ir->dst->flag & VRF_CONST)); + // int pow = ir->dst->vsize; + // assert(0 <= pow && pow < 4); + // const char **regs = kRegSizeTable[pow]; + // MOV(regs[ir->dst->phys], regs[GET_X0_INDEX()]); + // } +} + +// + +static int enum_callee_save_regs(unsigned long bit, int n, const int *indices, const char **regs, + const char **saves) { + int count = 0; + for (int i = 0; i < n; ++i) { + int ireg = indices[i]; + if (bit & (1 << ireg)) + saves[count++] = regs[ireg]; + } + return count; +} + +#define N (CALLEE_SAVE_REG_COUNT + CALLEE_SAVE_FREG_COUNT) + +int push_callee_save_regs(unsigned long used, unsigned long fused) { + const char *saves[ALIGN(N, 2)]; + int count = enum_callee_save_regs(used, CALLEE_SAVE_REG_COUNT, kCalleeSaveRegs, kReg64s, saves); + int fcount = enum_callee_save_regs(fused, CALLEE_SAVE_FREG_COUNT, kCalleeSaveFRegs, kFReg64s, + &saves[count]); + int total = count + fcount; + int total_aligned = ALIGN(total, 2); + if (total_aligned > 0) + ADDI(SP, SP, IM(-POINTER_SIZE * total_aligned)); + for (int i = 0; i < count; ++i) { + SD(saves[i], IMMEDIATE_OFFSET((total - 1 - i) * POINTER_SIZE, SP)); + } + for (int i = 0; i < fcount; ++i) { + FSD(saves[i + count], IMMEDIATE_OFFSET((total - 1 - count - i) * POINTER_SIZE, SP)); + } + return total_aligned; +} + +void pop_callee_save_regs(unsigned long used, unsigned long fused) { + const char *saves[ALIGN(N, 2)]; + int count = enum_callee_save_regs(used, CALLEE_SAVE_REG_COUNT, kCalleeSaveRegs, kReg64s, saves); + int fcount = enum_callee_save_regs(fused, CALLEE_SAVE_FREG_COUNT, kCalleeSaveFRegs, kFReg64s, + &saves[count]); + int total = count + fcount; + if (total == 0) + return; + + for (int i = fcount; i-- > 0; ) { + FLD(saves[i + count], IMMEDIATE_OFFSET((total - 1 - count - i) * POINTER_SIZE, SP)); + } + for (int i = count; i-- > 0; ) { + LD(saves[i], IMMEDIATE_OFFSET((count - 1 - i) * POINTER_SIZE, SP)); + } + ADDI(SP, SP, IM(POINTER_SIZE * ALIGN(total, 2))); +} + +int calculate_func_param_bottom(Function *func) { + const char *saves[(N + 1) & ~1]; + FuncBackend *fnbe = func->extra; + unsigned long used = fnbe->ra->used_reg_bits; //, fused = fnbe->ra->used_freg_bits; + int count = enum_callee_save_regs(used, CALLEE_SAVE_REG_COUNT, kCalleeSaveRegs, kReg64s, saves); + // int fcount = enum_callee_save_regs(fused, CALLEE_SAVE_FREG_COUNT, kCalleeSaveFRegs, kFReg64s, + // saves); + int fcount = 0; + int callee_save_count = ALIGN(count, 2) + ALIGN(fcount, 2); + + return (callee_save_count * POINTER_SIZE) + (POINTER_SIZE * 2); // Return address, saved base pointer. +} +#undef N + +inline bool is_freg(const char *reg) { + return reg[0] == 'f' && reg[1] != 'p'; +} + +static Vector *push_caller_save_regs(unsigned long living) { + Vector *saves = new_vector(); + + for (int i = 0; i < CALLER_SAVE_REG_COUNT; ++i) { + int ireg = kCallerSaveRegs[i]; + if (living & (1UL << ireg)) { + vec_push(saves, kReg64s[ireg]); + } + } + + for (int i = 0; i < CALLER_SAVE_FREG_COUNT; ++i) { + int freg = kCallerSaveFRegs[i]; + if (living & (1UL << (freg + PHYSICAL_REG_MAX))) { + // TODO: Detect register size. + vec_push(saves, kFReg64s[freg]); + } + } + + for (int i = 0, n = saves->len; i < n; ++i) { + const char *reg = saves->data[i]; + if (is_freg(reg)) + FSD(reg, IMMEDIATE_OFFSET((n - 1 - i) * POINTER_SIZE, SP)); + else + SD(reg, IMMEDIATE_OFFSET((n - 1 - i) * POINTER_SIZE, SP)); + } + + return saves; +} + +static void pop_caller_save_regs(Vector *saves) { + for (int n = saves->len, i = n; i-- > 0; ) { + const char *reg = saves->data[i]; + if (is_freg(reg)) + FLD(saves->data[i], IMMEDIATE_OFFSET((n - 1 - i) * POINTER_SIZE, SP)); + else + LD(saves->data[i], IMMEDIATE_OFFSET((n - 1 - i) * POINTER_SIZE, SP)); + } +} + +void emit_bb_irs(BBContainer *bbcon) { + typedef void (*EmitIrFunc)(IR *); + static const EmitIrFunc table[] = { + [IR_BOFS] = ei_bofs, [IR_IOFS] = ei_iofs, [IR_SOFS] = ei_sofs, + [IR_LOAD] = ei_load, [IR_LOAD_S] = ei_load_s, [IR_STORE] = ei_store, [IR_STORE_S] = ei_store_s, + [IR_ADD] = ei_add, [IR_SUB] = ei_sub, [IR_MUL] = ei_mul, [IR_DIV] = ei_div, + [IR_MOD] = ei_mod, [IR_BITAND] = ei_bitand, [IR_BITOR] = ei_bitor, + [IR_BITXOR] = ei_bitxor, [IR_LSHIFT] = ei_lshift, [IR_RSHIFT] = ei_rshift, + [IR_NEG] = ei_neg, [IR_BITNOT] = ei_bitnot, + [IR_COND] = ei_cond, [IR_JMP] = ei_jmp, [IR_TJMP] = ei_tjmp, + [IR_PRECALL] = ei_precall, [IR_PUSHARG] = ei_pusharg, [IR_CALL] = ei_call, + [IR_RESULT] = ei_result, [IR_SUBSP] = ei_subsp, [IR_CAST] = ei_cast, + [IR_MOV] = ei_mov, [IR_ASM] = ei_asm, + }; + + for (int i = 0; i < bbcon->bbs->len; ++i) { + BB *bb = bbcon->bbs->data[i]; +#ifndef NDEBUG + // Check BB connection. + if (i < bbcon->bbs->len - 1) { + BB *nbb = bbcon->bbs->data[i + 1]; + UNUSED(nbb); + assert(bb->next == nbb); + } else { + assert(bb->next == NULL); + } +#endif + + EMIT_LABEL(fmt_name(bb->label)); + for (int j = 0; j < bb->irs->len; ++j) { + IR *ir = bb->irs->data[j]; + assert(ir->kind < (int)(sizeof(table) / sizeof(*table))); + assert(table[ir->kind] != NULL); + (*table[ir->kind])(ir); + } + } +} + +// + +static void swap_opr12(IR *ir) { + VReg *tmp = ir->opr1; + ir->opr1 = ir->opr2; + ir->opr2 = tmp; +} + +static void insert_const_mov(VReg **pvreg, RegAlloc *ra, Vector *irs, int i) { + VReg *c = *pvreg; + VReg *tmp = reg_alloc_spawn(ra, c->vsize, 0); + IR *mov = new_ir_mov(tmp, c, ((IR*)irs->data[i])->flag); + vec_insert(irs, i, mov); + *pvreg = tmp; +} + +#define insert_tmp_mov insert_const_mov + +void tweak_irs(FuncBackend *fnbe) { + UNUSED(fnbe); + + BBContainer *bbcon = fnbe->bbcon; + RegAlloc *ra = fnbe->ra; + for (int i = 0; i < bbcon->bbs->len; ++i) { + BB *bb = bbcon->bbs->data[i]; + Vector *irs = bb->irs; + for (int j = 0; j < irs->len; ++j) { + IR *ir = irs->data[j]; + switch (ir->kind) { + case IR_LOAD: + if (ir->opr1->flag & VRF_CONST) { + insert_const_mov(&ir->opr1, ra, irs, j++); + } + break; + case IR_STORE: + if (ir->opr2->flag & VRF_CONST) { + insert_const_mov(&ir->opr2, ra, irs, j++); + } + break; + case IR_ADD: + assert(!(ir->opr1->flag & VRF_CONST) || !(ir->opr2->flag & VRF_CONST)); + if (ir->opr1->flag & VRF_CONST) + swap_opr12(ir); + if (ir->opr2->flag & VRF_CONST) { + // if (ir->opr2->fixnum < 0) { + // ir->kind = IR_SUB; + // VReg *old = ir->opr2; + // ir->opr2 = reg_alloc_spawn_const(ra, -old->fixnum, old->vsize); + // ir->opr2->flag = old->flag; + // } + if (ir->opr2->fixnum > 0x0fff || ir->opr2->fixnum < -0x0fff) + insert_const_mov(&ir->opr2, ra, irs, j++); + } + break; + case IR_SUB: + assert(!(ir->opr1->flag & VRF_CONST) || !(ir->opr2->flag & VRF_CONST)); + if (ir->opr1->flag & VRF_CONST) { + if (ir->opr1->fixnum == 0) { + ir->kind = IR_NEG; + ir->opr1 = ir->opr2; + ir->opr2 = NULL; + break; + } + insert_const_mov(&ir->opr1, ra, irs, j++); + } + if (ir->opr2->flag & VRF_CONST) { + // if (ir->opr2->fixnum < 0) { + // ir->kind = IR_ADD; + // VReg *old = ir->opr2; + // ir->opr2 = reg_alloc_spawn_const(ra, -old->fixnum, old->vsize); + // ir->opr2->flag = old->flag; + // } + if (ir->opr2->fixnum > 0x0fff || ir->opr2->fixnum < -0x0fff) + insert_const_mov(&ir->opr2, ra, irs, j++); + } + break; + case IR_MUL: + case IR_DIV: + case IR_MOD: + assert(!(ir->opr1->flag & VRF_CONST) || !(ir->opr2->flag & VRF_CONST)); + if (ir->opr1->flag & VRF_CONST) + insert_const_mov(&ir->opr1, ra, irs, j++); + if (ir->opr2->flag & VRF_CONST) + insert_const_mov(&ir->opr2, ra, irs, j++); + break; + case IR_BITAND: + case IR_BITOR: + case IR_BITXOR: + assert(!(ir->opr1->flag & VRF_CONST) || !(ir->opr2->flag & VRF_CONST)); + if (ir->opr1->flag & VRF_CONST) + insert_const_mov(&ir->opr1, ra, irs, j++); + if ((ir->opr2->flag & VRF_CONST) && !is_im12(ir->opr2->fixnum)) + insert_const_mov(&ir->opr2, ra, irs, j++); + break; + case IR_LSHIFT: + case IR_RSHIFT: + assert(!(ir->opr1->flag & VRF_CONST) || !(ir->opr2->flag & VRF_CONST)); + if (ir->opr1->flag & VRF_CONST) + insert_const_mov(&ir->opr1, ra, irs, j++); + break; + case IR_COND: + { + assert(ir->opr1 != NULL); + assert(ir->opr2 != NULL); + int cond = ir->cond.kind & COND_MASK; + switch (cond) { + case COND_EQ: case COND_NE: + assert(!(ir->opr1->flag & VRF_CONST)); + if (!(ir->opr2->flag & VRF_CONST) || ir->opr2->fixnum != 0) { + IR *sub = new_ir_bop_raw(IR_SUB, ir->dst, ir->opr1, ir->opr2, ir->flag); + vec_insert(irs, j++, sub); + + ir->opr1 = ir->dst; + ir->opr2 = reg_alloc_spawn_const(ra, 0, ir->dst->vsize); + } + break; + case COND_LE: case COND_GT: + if (ir->opr2->flag & VRF_CONST) + insert_const_mov(&ir->opr2, ra, irs, j++); + break; + case COND_LT: case COND_GE: + if ((ir->opr2->flag & VRF_CONST) && + (ir->opr2->fixnum < -4096 || ir->opr2->fixnum > 4096)) + insert_const_mov(&ir->opr2, ra, irs, j++); + break; + default: + break; + } + } + break; + case IR_JMP: + if (ir->opr1 != NULL && ir->opr1->flag & VRF_FLONUM) { + // Cannot use fp registers as jump operands, so move it to a general register. + int c1 = ir->jmp.cond, c2 = COND_NE; + if (c1 == COND_NE) { + // No `fne` instruction, so use `feq` and negate the result. + c1 = COND_EQ; + c2 = COND_EQ; + } + + VReg *opr1 = ir->opr1, *opr2 = ir->opr2; + VReg *tmp = reg_alloc_spawn(ra, VRegSize4, 0); + IR *cond = new_ir_bop_raw(IR_COND, tmp, opr1, opr2, 0); + cond->cond.kind = c1; + + vec_insert(irs, j++, cond); + + ir->jmp.cond = c2; + ir->opr1 = tmp; + ir->opr2 = reg_alloc_spawn_const(ra, 0, VRegSize4); + } else if (ir->opr2 != NULL && + (ir->opr2->flag & VRF_CONST) && + ir->opr2->fixnum != 0) { + insert_const_mov(&ir->opr2, ra, irs, j++); + } + break; + case IR_TJMP: + // Make sure opr1 can be broken. + insert_tmp_mov(&ir->opr1, ra, irs, j++); + break; + case IR_PUSHARG: + if (ir->opr1->flag & VRF_CONST) + insert_const_mov(&ir->opr1, ra, irs, j++); + break; + + default: break; + } + } + } +} diff --git a/src/cc/arch/riscv64/riscv64.h b/src/cc/arch/riscv64/riscv64.h new file mode 100644 index 000000000..b693669e2 --- /dev/null +++ b/src/cc/arch/riscv64/riscv64.h @@ -0,0 +1,225 @@ +#pragma once + +#include + +#include "emit_util.h" + +#ifndef IM +#define IM(x) im(x) +#endif +#ifndef IMMEDIATE_OFFSET +#define IMMEDIATE_OFFSET(ofs, reg) immediate_offset(ofs, reg) +#endif +#ifndef IMMEDIATE_OFFSET0 +#define IMMEDIATE_OFFSET0(reg) immediate_offset(0, reg) +#endif +#ifndef LABEL_OFFSET_HI +#define LABEL_OFFSET_HI(label) label_offset_hi(label) +#endif +#ifndef LABEL_OFFSET_LO +#define LABEL_OFFSET_LO(label) label_offset_lo(label) +#endif +#ifndef NUM +#define NUM(x) num(x) +#endif +#ifndef HEXNUM +#define HEXNUM(x) hexnum(x) +#endif +#ifndef FLONUM +#define FLONUM(x) flonum(x) +#endif +#ifndef MANGLE +#define MANGLE(label) mangle(label) +#endif + +#define _UXTW(shift) fmt("uxtw #%d", shift) +#define _LSL(shift) fmt("lsl #%d", shift) + +#define ZERO "zero" // x0: Zero register +#define RA "ra" // x1: Return Address +#define SP "sp" // x2: Stack Pointer +#define FP "fp" // x8: Frame Pointer +#define A0 "a0" +#define A1 "a1" +#define A2 "a2" +#define A3 "a3" +#define A4 "a4" +#define A5 "a5" +#define A6 "a6" +#define A7 "a7" +#define S0 "s0" +#define S1 "s1" +#define S2 "s2" +#define S3 "s3" +#define S4 "s4" +#define S5 "s5" +#define S6 "s6" +#define S7 "s7" +#define S8 "s8" +#define S9 "s9" +#define S10 "s10" +#define S11 "s11" +#define T0 "t0" +#define T1 "t1" +#define T2 "t2" + +#define FA0 "fa0" +#define FA1 "fa1" +#define FA2 "fa2" +#define FA3 "fa3" +#define FA4 "fa4" +#define FA5 "fa5" +#define FA6 "fa6" +#define FA7 "fa7" +#define FS0 "fs0" +#define FS1 "fs1" +#define FS2 "fs2" +#define FS3 "fs3" +#define FS4 "fs4" +#define FS5 "fs5" +#define FS6 "fs6" +#define FS7 "fs7" +#define FS8 "fs8" +#define FS9 "fs9" +#define FS10 "fs10" +#define FS11 "fs11" +#define FT0 "ft0" +#define FT1 "ft1" +#define FT2 "ft2" +#define FT3 "ft3" +#define FT4 "ft4" +#define FT5 "ft5" +#define FT6 "ft6" +#define FT7 "ft7" +#define FT8 "ft8" +#define FT9 "ft9" +#define FT10 "ft10" +#define FT11 "ft11" + +// Condition +#define CEQ "eq" +#define CNE "ne" +#define CLT "lt" +#define CGE "ge" +#define CLTU "ltu" +#define CGEU "geu" + +#define LI(o1, o2) EMIT_ASM("li", o1, o2) +#define LUI(o1, o2) EMIT_ASM("lui", o1, o2) +#define ADD(o1, o2, o3) EMIT_ASM("add", o1, o2, o3) +#define ADDI(o1, o2, o3) EMIT_ASM("addi", o1, o2, o3) +#define ADDW(o1, o2, o3) EMIT_ASM("addw", o1, o2, o3) +#define ADDIW(o1, o2, o3) EMIT_ASM("addiw", o1, o2, o3) +#define SUB(o1, o2, o3) EMIT_ASM("sub", o1, o2, o3) +#define SUBW(o1, o2, o3) EMIT_ASM("subw", o1, o2, o3) +#define MUL(o1, o2, o3) EMIT_ASM("mul", o1, o2, o3) +#define MULW(o1, o2, o3) EMIT_ASM("mulw", o1, o2, o3) +#define DIV(o1, o2, o3) EMIT_ASM("div", o1, o2, o3) +#define DIVU(o1, o2, o3) EMIT_ASM("divu", o1, o2, o3) +#define DIVW(o1, o2, o3) EMIT_ASM("divw", o1, o2, o3) +#define DIVUW(o1, o2, o3) EMIT_ASM("divuw", o1, o2, o3) +#define REM(o1, o2, o3) EMIT_ASM("rem", o1, o2, o3) +#define REMU(o1, o2, o3) EMIT_ASM("remu", o1, o2, o3) +#define REMW(o1, o2, o3) EMIT_ASM("remw", o1, o2, o3) +#define REMUW(o1, o2, o3) EMIT_ASM("remuw", o1, o2, o3) +#define AND(o1, o2, o3) EMIT_ASM("and", o1, o2, o3) +#define ANDI(o1, o2, o3) EMIT_ASM("andi", o1, o2, o3) +#define OR(o1, o2, o3) EMIT_ASM("or", o1, o2, o3) +#define ORI(o1, o2, o3) EMIT_ASM("ori", o1, o2, o3) +#define XOR(o1, o2, o3) EMIT_ASM("xor", o1, o2, o3) +#define XORI(o1, o2, o3) EMIT_ASM("xori", o1, o2, o3) +#define SLL(o1, o2, o3) EMIT_ASM("sll", o1, o2, o3) // Logical left shift +#define SLLI(o1, o2, o3) EMIT_ASM("slli", o1, o2, o3) // Logical left shift +#define SLLIW(o1, o2, o3) EMIT_ASM("slliw", o1, o2, o3) // Logical left shift, 32bit +#define SRL(o1, o2, o3) EMIT_ASM("srl", o1, o2, o3) // Logical right shift +#define SRLI(o1, o2, o3) EMIT_ASM("srli", o1, o2, o3) // Logical right shift +#define SRA(o1, o2, o3) EMIT_ASM("sra", o1, o2, o3) // Arithmetic right shift +#define SRAI(o1, o2, o3) EMIT_ASM("srai", o1, o2, o3) // Arithmetic right shift +#define J(o1) EMIT_ASM("j", o1) // => jal zero, o1 +#define JR(o1) EMIT_ASM("jr", o1) // => jalr zero, 0(o1) +#define JALR(o1) EMIT_ASM("jalr", o1) // => jalr ra, 0(o1) +#define Bcc(c, o1, o2, o3) EMIT_ASM("b" c, o1, o2, o3) +#define CALL(o1) EMIT_ASM("call", o1) +#define RET() EMIT_ASM("ret") + +#define LB(o1, o2) EMIT_ASM("lb", o1, o2) +#define LH(o1, o2) EMIT_ASM("lh", o1, o2) +#define LW(o1, o2) EMIT_ASM("lw", o1, o2) +#define LD(o1, o2) EMIT_ASM("ld", o1, o2) +#define LBU(o1, o2) EMIT_ASM("lbu", o1, o2) +#define LHU(o1, o2) EMIT_ASM("lhu", o1, o2) +#define LWU(o1, o2) EMIT_ASM("lwu", o1, o2) +#define SB(o1, o2) EMIT_ASM("sb", o1, o2) +#define SH(o1, o2) EMIT_ASM("sh", o1, o2) +#define SW(o1, o2) EMIT_ASM("sw", o1, o2) +#define SD(o1, o2) EMIT_ASM("sd", o1, o2) + +#define MV(o1, o2) EMIT_ASM("mv", o1, o2) // => addi o1, o2, 0 +#define NEG(o1, o2) EMIT_ASM("neg", o1, o2) // => sub o1, zero, o2 +#define NOT(o1, o2) EMIT_ASM("not", o1, o2) // => xori o1, o2, -1 +#define SEXTW(o1, o2) EMIT_ASM("sext.w", o1, o2) // => addiw o1, o2, 0 + +#define SEQZ(o1, o2) EMIT_ASM("seqz", o1, o2) +#define SNEZ(o1, o2) EMIT_ASM("snez", o1, o2) +#define SLTZ(o1, o2) EMIT_ASM("sltz", o1, o2) +#define SGTZ(o1, o2) EMIT_ASM("sgtz", o1, o2) +#define SLT(o1, o2, o3) EMIT_ASM("slt", o1, o2, o3) +#define SLTI(o1, o2, o3) EMIT_ASM("slti", o1, o2, o3) +#define SLTU(o1, o2, o3) EMIT_ASM("sltu", o1, o2, o3) +#define SLTIU(o1, o2, o3) EMIT_ASM("sltiu", o1, o2, o3) + +#define _BYTE(x) EMIT_ASM(".byte", x) +#define _WORD(x) EMIT_ASM(".short", x) // Or .hword +#define _LONG(x) EMIT_ASM(".long", x) +#define _QUAD(x) EMIT_ASM(".quad", x) +#define _FLOAT(x) EMIT_ASM(".float", x) +#define _DOUBLE(x) EMIT_ASM(".double", x) +#define _GLOBL(x) EMIT_ASM(".globl", x) +#define _COMM(x, y) EMIT_ASM(".comm", x, y) +#define _ASCII(x) EMIT_ASM(".ascii", x) +#define _SECTION(x) EMIT_ASM(".section", x) +#define _TEXT() EMIT_ASM(".text") +#define _DATA() EMIT_ASM(".data") + +#define EMIT_ALIGN(x) emit_align_p2(x) + +#define _RODATA() _SECTION(".rodata") +#define _LOCAL(x) EMIT_ASM(".local", x) + +#define _BSS(label, size, align) emit_bss(label, size, align) + + +#define FMV_D(o1, o2) EMIT_ASM("fmv.d", o1, o2) // dst <- src +#define FADD_D(o1, o2, o3) EMIT_ASM("fadd.d", o1, o2, o3) +#define FADD_S(o1, o2, o3) EMIT_ASM("fadd.s", o1, o2, o3) +#define FSUB_D(o1, o2, o3) EMIT_ASM("fsub.d", o1, o2, o3) +#define FSUB_S(o1, o2, o3) EMIT_ASM("fsub.s", o1, o2, o3) +#define FMUL_D(o1, o2, o3) EMIT_ASM("fmul.d", o1, o2, o3) +#define FMUL_S(o1, o2, o3) EMIT_ASM("fmul.s", o1, o2, o3) +#define FDIV_D(o1, o2, o3) EMIT_ASM("fdiv.d", o1, o2, o3) +#define FDIV_S(o1, o2, o3) EMIT_ASM("fdiv.s", o1, o2, o3) +#define FLD(o1, o2) EMIT_ASM("fld", o1, o2) +#define FLW(o1, o2) EMIT_ASM("flw", o1, o2) +#define FSD(o1, o2) EMIT_ASM("fsd", o1, o2) +#define FSW(o1, o2) EMIT_ASM("fsw", o1, o2) + +#define FCVT_W_D(o1, o2) EMIT_ASM("fcvt.w.d", o1, o2, "rtz") // int <- double +#define FCVT_W_S(o1, o2) EMIT_ASM("fcvt.w.s", o1, o2, "rtz") // int <- float +#define FCVT_D_W(o1, o2) EMIT_ASM("fcvt.d.w", o1, o2) // double <- int +#define FCVT_D_WU(o1, o2) EMIT_ASM("fcvt.d.wu", o1, o2) // double <- unsigned int +#define FCVT_S_W(o1, o2) EMIT_ASM("fcvt.s.w", o1, o2) // float <- int +#define FCVT_S_WU(o1, o2) EMIT_ASM("fcvt.s.wu", o1, o2) // float <- unsigned int +#define FCVT_D_S(o1, o2) EMIT_ASM("fcvt.d.s", o1, o2) // double <- float +#define FCVT_S_D(o1, o2) EMIT_ASM("fcvt.s.d", o1, o2) // float <- double + +#define FMV_X_W(o1, o2) EMIT_ASM("fmv.x.w", o1, o2) // int <- float(hex) +#define FMV_X_D(o1, o2) EMIT_ASM("fmv.x.d", o1, o2) // int <- double(hex) + +#define FEQ_D(o1, o2, o3) EMIT_ASM("feq.d", o1, o2, o3) +#define FEQ_S(o1, o2, o3) EMIT_ASM("feq.s", o1, o2, o3) +#define FLT_D(o1, o2, o3) EMIT_ASM("flt.d", o1, o2, o3) +#define FLT_S(o1, o2, o3) EMIT_ASM("flt.s", o1, o2, o3) +#define FLE_D(o1, o2, o3) EMIT_ASM("fle.d", o1, o2, o3) +#define FLE_S(o1, o2, o3) EMIT_ASM("fle.s", o1, o2, o3) + +void mov_immediate(const char *dst, int64_t value, bool is_unsigned); diff --git a/src/cc/backend/codegen.c b/src/cc/backend/codegen.c index 733d5ae41..abd09c47d 100644 --- a/src/cc/backend/codegen.c +++ b/src/cc/backend/codegen.c @@ -777,8 +777,13 @@ void alloc_stack_variables_onto_stack_frame(Function *func) { int param_offset = calculate_func_param_bottom(func); fnbe->vaarg_frame_info.offset = param_offset; - if (func->type->func.vaargs) + if (func->type->func.vaargs) { +#if VAARG_FP_AS_GP + // Register parameters are put below stack frame, so not added to frame_size. +#else frame_size = (MAX_REG_ARGS + MAX_FREG_ARGS) * POINTER_SIZE; +#endif + } bool require_stack_frame = false; diff --git a/src/cc/backend/codegen_expr.c b/src/cc/backend/codegen_expr.c index 866d7fb8b..1f5340c3e 100644 --- a/src/cc/backend/codegen_expr.c +++ b/src/cc/backend/codegen_expr.c @@ -53,13 +53,6 @@ void add_builtin_function(const char *str, Type *type, BuiltinFunctionProc *proc scope_add(global_scope, name, type, 0); } -inline enum ConditionKind swap_cond(enum ConditionKind cond) { - assert(COND_EQ <= cond && cond <= COND_GT); - if (cond >= COND_LT) - cond = (COND_GT + COND_LT) - cond; - return cond; -} - struct CompareExpr { enum ConditionKind cond; VReg *lhs, *rhs; @@ -454,6 +447,9 @@ static VReg *gen_funcall(Expr *expr) { int size; bool stack_arg; bool is_flo; +#if VAARG_FP_AS_GP + bool fp_as_gp; +#endif } ArgInfo; ArgInfo *arg_infos = NULL; @@ -475,6 +471,13 @@ static VReg *gen_funcall(Expr *expr) { assert(arg->type->kind != TY_ARRAY); p->size = type_size(arg->type); p->is_flo = is_flonum(arg->type); +#if VAARG_FP_AS_GP + p->fp_as_gp = false; + if (functype->func.vaargs && functype->func.params != NULL && i >= functype->func.params->len) { + p->is_flo = false; + p->fp_as_gp = true; + } +#endif p->stack_arg = is_stack_param(arg->type); #if VAARG_ON_STACK if (functype->func.vaargs && functype->func.params != NULL && i >= functype->func.params->len) @@ -524,7 +527,13 @@ static VReg *gen_funcall(Expr *expr) { ++iregarg; int index = reg_arg_count - iregarg + arg_start; assert(index < MAX_REG_ARGS); - new_ir_pusharg(vreg, index); + IR *ir = new_ir_pusharg(vreg, index); +#if !VAARG_FP_AS_GP + UNUSED(ir); +#else + if (p->fp_as_gp) + ir->pusharg.fp_as_gp = true; +#endif } } else { enum VRegSize offset_type = 2; //{.size = 4, .align = 4}; // TODO: diff --git a/src/cc/backend/ir.c b/src/cc/backend/ir.c index 3c46e865e..424119114 100644 --- a/src/cc/backend/ir.c +++ b/src/cc/backend/ir.c @@ -11,6 +11,8 @@ static enum VRegSize vtVoidPtr = VRegSize8; static enum VRegSize vtBool = VRegSize4; +inline enum ConditionKind swap_cond(enum ConditionKind cond); + // Virtual register void spill_vreg(VReg *vreg) { @@ -39,6 +41,15 @@ VReg *new_const_vreg(int64_t value, enum VRegSize vsize) { return reg_alloc_spawn_const(curra, value, vsize); } +IR *new_ir_bop_raw(enum IrKind kind, VReg *dst, VReg *opr1, VReg *opr2, int flag) { + IR *ir = new_ir(kind); + ir->flag = flag; + ir->dst = dst; + ir->opr1 = opr1; + ir->opr2 = opr2; + return ir; +} + VReg *new_ir_bop(enum IrKind kind, VReg *opr1, VReg *opr2, enum VRegSize vsize, int flag) { if (opr1->flag & VRF_CONST) { if (opr2->flag & VRF_CONST) { @@ -153,11 +164,9 @@ VReg *new_ir_bop(enum IrKind kind, VReg *opr1, VReg *opr2, enum VRegSize vsize, } } - IR *ir = new_ir(kind); - ir->flag = flag; - ir->opr1 = opr1; - ir->opr2 = opr2; - return ir->dst = reg_alloc_spawn(curra, vsize, opr1->flag & VRF_MASK); + VReg *dst = reg_alloc_spawn(curra, vsize, opr1->flag & VRF_MASK); + new_ir_bop_raw(kind, dst, opr1, opr2, flag); + return dst; } VReg *new_ir_unary(enum IrKind kind, VReg *opr, enum VRegSize vsize, int flag) { @@ -243,11 +252,15 @@ void new_ir_tjmp(VReg *val, BB **bbs, size_t len) { ir->tjmp.len = len; } -void new_ir_pusharg(VReg *vreg, int index) { +IR *new_ir_pusharg(VReg *vreg, int index) { assert(index >= 0); IR *ir = new_ir(IR_PUSHARG); ir->opr1 = vreg; ir->pusharg.index = index; +#if VAARG_FP_AS_GP + ir->pusharg.fp_as_gp = false; +#endif + return ir; } IR *new_ir_precall(int arg_count, int stack_args_size) { diff --git a/src/cc/backend/ir.h b/src/cc/backend/ir.h index 03775769f..5ffa152ac 100644 --- a/src/cc/backend/ir.h +++ b/src/cc/backend/ir.h @@ -108,6 +108,12 @@ enum { COND_FLONUM = 1 << 4, }; +inline enum ConditionKind swap_cond(enum ConditionKind cond) { + if (cond >= COND_LT) + cond = (COND_GT + COND_LT) - cond; + return cond; +} + #define IRF_UNSIGNED (1 << 0) typedef struct IR { @@ -146,6 +152,9 @@ typedef struct IR { } precall; struct { int index; +#if VAARG_FP_AS_GP + bool fp_as_gp; +#endif } pusharg; struct { const Name *label; @@ -164,6 +173,7 @@ typedef struct IR { VReg *new_const_vreg(int64_t value, enum VRegSize vsize); VReg *new_ir_bop(enum IrKind kind, VReg *opr1, VReg *opr2, enum VRegSize vsize, int flag); +IR *new_ir_bop_raw(enum IrKind kind, VReg *dst, VReg *opr1, VReg *opr2, int flag); VReg *new_ir_unary(enum IrKind kind, VReg *opr, enum VRegSize vsize, int flag); VReg *new_ir_load(VReg *opr, enum VRegSize vsize, int vflag, int irflag); IR *new_ir_mov(VReg *dst, VReg *src, int flag); @@ -176,7 +186,7 @@ void new_ir_jmp(BB *bb); // Non-conditional jump void new_ir_cjmp(VReg *opr1, VReg *opr2, enum ConditionKind cond, BB *bb); // Conditional jump void new_ir_tjmp(VReg *val, BB **bbs, size_t len); IR *new_ir_precall(int arg_count, int stack_args_size); -void new_ir_pusharg(VReg *vreg, int index); +IR *new_ir_pusharg(VReg *vreg, int index); VReg *new_ir_call(const Name *label, bool global, VReg *freg, int total_arg_count, int reg_arg_count, enum VRegSize result_size, int result_flag, IR *precall, VReg **args, int vaarg_start); diff --git a/src/cc/builtin.c b/src/cc/builtin.c index 811dc7f57..4e4dcd7e2 100644 --- a/src/cc/builtin.c +++ b/src/cc/builtin.c @@ -48,6 +48,46 @@ static VReg *gen_builtin_va_start(Expr *expr) { new_ir_mov(varinfo->local.vreg, ptr, IRF_UNSIGNED); return NULL; } +#elif XCC_TARGET_ARCH == XCC_ARCH_RISCV64 +static VReg *gen_builtin_va_start(Expr *expr) { + assert(expr->kind == EX_FUNCALL); + Vector *args = expr->funcall.args; + assert(args->len == 2); + assert(curfunc != NULL); + Expr *var = strip_cast(args->data[1]); + if (var->kind == EX_REF) + var = var->unary.sub; + int gn = -1; + if (var->kind == EX_VAR) { + const Vector *params = curfunc->params; + int g = 0; + for (int i = 0; i < params->len; ++i) { + VarInfo *info = params->data[i]; + const Type *t = info->type; + if (t->kind != TY_STRUCT) { + ++g; + } + + if (info->name != NULL && equal_name(info->name, var->var.name)) { + gn = g; + break; + } + } + } + if (gn < 0) { + parse_error(PE_FATAL, var->token, "Must be function argument"); + return NULL; + } + + FuncBackend *fnbe = curfunc->extra; + FrameInfo *fi = &fnbe->vaarg_frame_info; + VReg *p = new_ir_bofs(fi); + + // (void)(ap = fp + ) + VReg *ap = gen_expr(args->data[0]); + new_ir_mov(ap, p, IRF_UNSIGNED); + return NULL; +} #else static VReg *gen_builtin_va_start(Expr *expr) { assert(expr->kind == EX_FUNCALL); @@ -146,7 +186,7 @@ void install_builtins(void) { add_builtin_expr_ident("__builtin_type_kind", &p_reg_class); { -#if VAARG_ON_STACK +#if VAARG_ON_STACK || XCC_TARGET_ARCH == XCC_ARCH_RISCV64 Type *tyVaList = ptrof(&tyVoidPtr); #else Type *tyVaElem = create_struct_type(NULL, alloc_name("__va_elem", NULL, false), 0); diff --git a/src/cc/frontend/fe_misc.c b/src/cc/frontend/fe_misc.c index 786e1afec..dcccfbb10 100644 --- a/src/cc/frontend/fe_misc.c +++ b/src/cc/frontend/fe_misc.c @@ -699,7 +699,10 @@ Expr *extract_bitfield_value(Expr *src, const MemberInfo *minfo) { tmp = new_expr_bop(EX_BITAND, tmp->type, tmp->token, tmp, new_expr_fixlit(tmp->type, tmp->token, mask)); } else { -#if XCC_TARGET_ARCH == XCC_ARCH_AARCH64 || XCC_TARGET_ARCH == XCC_ARCH_WASM +#if XCC_TARGET_ARCH == XCC_ARCH_RISCV64 + const unsigned int MINREGSIZE = 8; + int w = MAX(type_size(type), MINREGSIZE) * TARGET_CHAR_BIT; +#elif XCC_TARGET_ARCH == XCC_ARCH_AARCH64 || XCC_TARGET_ARCH == XCC_ARCH_WASM const unsigned int MINREGSIZE = 4; int w = MAX(type_size(type), MINREGSIZE) * TARGET_CHAR_BIT; #else diff --git a/src/config.h b/src/config.h index 9d16f321e..2bb90e875 100644 --- a/src/config.h +++ b/src/config.h @@ -61,6 +61,11 @@ #define VAARG_ON_STACK 1 #endif +#if !defined(VAARG_FP_AS_GP) && XCC_TARGET_ARCH == XCC_ARCH_RISCV64 +// Pass floating-point arguments in general-purpose registers for variadic arguments. +#define VAARG_FP_AS_GP 1 +#endif + #if !defined(MANGLE_PREFIX) && XCC_TARGET_PLATFORM == XCC_PLATFORM_APPLE #define MANGLE_PREFIX "_" #endif diff --git a/tests/Makefile b/tests/Makefile index 4ffed3f40..66000ad43 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -71,15 +71,15 @@ test-print-type: print_type_test .PHONY: test-val test-val: valtest @echo '## valtest' - @./valtest + @$(RUN_EXE) ./valtest .PHONY: test-dval, test-fval test-dval: dvaltest @echo '## dvaltest' - @./dvaltest + @$(RUN_EXE) ./dvaltest test-fval: fvaltest @echo '## fvaltest' - @./fvaltest + @$(RUN_EXE) ./fvaltest .PHONY: test-cpp test-cpp: # $(CPP) @@ -89,18 +89,24 @@ test-cpp: # $(CPP) .PHONY: test-sh test-sh: # $(XCC) @echo '## test.sh' - @XCC="$(XCC)" ./test.sh + @$(eval AOUT := ./$(shell basename `mktemp -u`)) + @XCC="$(XCC)" RUN_AOUT="$(RUN_EXE) $(AOUT)" AOUT=$(AOUT) ./test.sh @rm -f core .PHONY: test-examples test-examples: # $(XCC) @echo '## Example test' - @XCC="$(XCC)" ./example_test.sh + @XCC="$(XCC)" RUN_EXE="$(RUN_EXE)" ./example_test.sh .PHONY: test-link +ifeq ("$(NO_LINK_TEST)", "") test-link: link_test # $(XCC) @echo '## Link test' - @./link_test + @$(RUN_EXE) ./link_test +else +test-link: + @echo '## Link test: skip' +endif INITIALIZER_SRCS:=initializer_test.c $(CC1_FE_DIR)/parser.c $(CC1_FE_DIR)/parser_expr.c $(CC1_FE_DIR)/lexer.c \ $(CC1_FE_DIR)/initializer.c $(CC1_FE_DIR)/fe_misc.c $(CC1_FE_DIR)/var.c $(CC1_FE_DIR)/type.c $(CC1_FE_DIR)/ast.c $(UTIL_DIR)/util.c $(UTIL_DIR)/table.c \ diff --git a/tests/example_test.sh b/tests/example_test.sh index 43cb797e0..4cc6f73f9 100755 --- a/tests/example_test.sh +++ b/tests/example_test.sh @@ -4,6 +4,7 @@ source ./test_sub.sh AOUT=${AOUT:-$(basename "$(mktemp -u)")} XCC=${XCC:-../xcc} +# RUN_EXE=${RUN_EXE:-} try() { local title="$1" @@ -19,7 +20,7 @@ try() { declare -a args=( "$@" ) local actual - actual=$(./"$AOUT" "${args[@]:3}") > /dev/null 2>&1 || { + actual=$(${RUN_EXE} ./"$AOUT" "${args[@]:3}") > /dev/null 2>&1 || { end_test 'Exec failed' return } @@ -42,7 +43,7 @@ try_cmp() { } declare -a args=( "$@" ) - ./"$AOUT" "${args[@]:4}" > /dev/null 2>&1 || { + ${RUN_EXE} ./"$AOUT" "${args[@]:4}" > /dev/null 2>&1 || { end_test 'Exec failed' return } @@ -53,7 +54,7 @@ try_cmp() { no_flonum() { echo -e "#include \nint main(){\n#ifdef __NO_FLONUM\nputs(\"true\");\n#endif\nreturn 0;}" > tmp.c - $XCC tmp.c && ./a.out || exit 1 + $XCC tmp.c && ${RUN_EXE} ./a.out || exit 1 } test_all() { diff --git a/tool/run-riscv64 b/tool/run-riscv64 new file mode 100755 index 000000000..1b657191c --- /dev/null +++ b/tool/run-riscv64 @@ -0,0 +1,7 @@ +#!/bin/bash + +# Execute RISC-V executable on Spike + +# Eliminate "bbl loader" message +spike $RISCV/riscv64-unknown-elf/bin/pk $@ | tail -n +2 +exit ${PIPESTATUS[0]}